Bump fastify from 5.6.2 to 5.8.3 #73
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Tests | |
| on: | |
| pull_request: | |
| types: | |
| - opened | |
| - synchronize | |
| - labeled | |
| - unlabeled | |
| paths-ignore: | |
| - "packages/docs/**" | |
| permissions: | |
| contents: read | |
| actions: write | |
| env: | |
| BROWSERBASE_FLOW_LOGS: "1" | |
| LLM_MAX_MS: "15000" | |
| EVAL_MODELS: "openai/gpt-4.1,google/gemini-2.0-flash,anthropic/claude-haiku-4-5" | |
| EVAL_AGENT_MODELS: "computer-use-preview-2025-03-11,claude-sonnet-4-6" | |
| EVAL_CATEGORIES: "observe,act,combination,extract,targeted_extract,agent" | |
| EVAL_MAX_CONCURRENCY: 25 | |
| EVAL_TRIAL_COUNT: 3 | |
| LOCAL_SESSION_LIMIT_PER_E2E_TEST: 2 | |
| BROWSERBASE_SESSION_LIMIT_PER_E2E_TEST: 3 | |
| BROWSERBASE_REGION_DISTRIBUTION: "us-west-2=30,us-east-1=30,eu-central-1=20,ap-southeast-1=20" # percentage of load for each region when running e2e tests against prod | |
| CHROME_PATH: /usr/bin/chromium # GitHub Actions runners ship with stable Chromium by default | |
| BROWSERBASE_CDP_CONNECT_MAX_MS: "10000" | |
| BROWSERBASE_SESSION_CREATE_MAX_MS: "60000" | |
| PUPPETEER_SKIP_DOWNLOAD: "1" | |
| PLAYWRIGHT_SKIP_DOWNLOAD: "1" | |
| TURBO_TELEMETRY_DISABLED: "1" | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| determine-changes: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| core: ${{ steps.filter.outputs.core }} | |
| cli: ${{ steps.filter.outputs.cli }} | |
| evals: ${{ steps.filter.outputs.evals }} | |
| server: ${{ steps.filter.outputs.server }} | |
| docs-only: ${{ steps.filter.outputs.docs-only }} | |
| steps: | |
| - name: Check out repository code | |
| uses: actions/checkout@v4 | |
| - name: Log GitHub API rate limit | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| headers_file=$(mktemp) | |
| body_file=$(mktemp) | |
| curl -sSL \ | |
| -D "$headers_file" \ | |
| -o "$body_file" \ | |
| -H "Accept: application/vnd.github+json" \ | |
| -H "X-GitHub-Api-Version: 2022-11-28" \ | |
| -H "Authorization: Bearer $GITHUB_TOKEN" \ | |
| https://api.github.com/rate_limit | |
| cat "$headers_file" | |
| echo "" | |
| cat "$body_file" | |
| remaining=$(jq -r '.rate.remaining' "$body_file") | |
| if [ "$remaining" -eq 0 ]; then | |
| reset_epoch=$(jq -r '.rate.reset' "$body_file") | |
| reset_utc=$(date -u -d "@$reset_epoch" +"%Y-%m-%d %H:%M:%S") | |
| reset_pacific=$(TZ=America/Los_Angeles date -d "@$reset_epoch" +"%Y-%m-%d %H:%M:%S %Z") | |
| echo "Github API rate limited until: ${reset_pacific} (${reset_utc} UTC)" >> "$GITHUB_STEP_SUMMARY" | |
| echo "GitHub API rate limit exhausted." | |
| exit 1 | |
| fi | |
| - uses: dorny/paths-filter@v3 | |
| id: filter | |
| with: | |
| filters: | | |
| core: | |
| - '.github/workflows/ci.yml' | |
| - 'packages/core/**' | |
| - 'package.json' | |
| - 'pnpm-lock.yaml' | |
| - 'turbo.json' | |
| cli: | |
| - 'packages/cli/**' | |
| - 'packages/core/**' | |
| - 'package.json' | |
| - 'pnpm-lock.yaml' | |
| evals: | |
| - 'packages/evals/**' | |
| - 'package.json' | |
| - 'pnpm-lock.yaml' | |
| server: | |
| - 'packages/server-v3/**' | |
| - 'packages/server-v4/**' | |
| - 'packages/core/**' | |
| - 'package.json' | |
| - 'pnpm-lock.yaml' | |
| - 'pnpm-workspace.yaml' | |
| - '.github/workflows/ci.yml' | |
| docs-only: | |
| - '**/*.md' | |
| - 'examples/**' | |
| - '!packages/**/*.md' | |
| determine-evals: | |
| needs: [determine-changes] | |
| runs-on: ubuntu-latest | |
| outputs: | |
| skip-all-evals: ${{ steps.check-labels.outputs.skip-all-evals }} | |
| eval-categories: ${{ steps.check-labels.outputs.eval-categories }} | |
| steps: | |
| - id: check-labels | |
| run: | | |
| categories=() | |
| declare -A seen | |
| add_category() { | |
| local category="$1" | |
| if [[ -z "${seen[$category]:-}" ]]; then | |
| categories+=("$category") | |
| seen["$category"]=1 | |
| fi | |
| } | |
| emit_categories() { | |
| local json="[" | |
| for category in "${categories[@]}"; do | |
| json+="\"${category}\"," | |
| done | |
| json="${json%,}" | |
| json+="]" | |
| echo "eval-categories=$json" >> $GITHUB_OUTPUT | |
| } | |
| # Check if skip-evals label is present | |
| if [[ "${{ contains(github.event.pull_request.labels.*.name, 'skip-evals') }}" == "true" ]]; then | |
| echo "skip-evals label found - skipping all evals" | |
| echo "skip-all-evals=true" >> $GITHUB_OUTPUT | |
| emit_categories | |
| exit 0 | |
| fi | |
| # Skip evals if only docs/examples changed | |
| if [[ "${{ needs.determine-changes.outputs.docs-only }}" == "true" && "${{ needs.determine-changes.outputs.core }}" == "false" && "${{ needs.determine-changes.outputs.evals }}" == "false" ]]; then | |
| echo "Only docs/examples changed - skipping evals" | |
| echo "skip-all-evals=true" >> $GITHUB_OUTPUT | |
| emit_categories | |
| exit 0 | |
| fi | |
| # Check for skip-regression-evals label | |
| if [[ "${{ contains(github.event.pull_request.labels.*.name, 'skip-regression-evals') }}" == "true" ]]; then | |
| echo "skip-regression-evals label found - regression evals will be skipped" | |
| else | |
| echo "Regression evals will run by default" | |
| add_category "regression" | |
| fi | |
| # Check for specific labels | |
| echo "skip-all-evals=false" >> $GITHUB_OUTPUT | |
| if [[ "${{ contains(github.event.pull_request.labels.*.name, 'combination') }}" == "true" ]]; then | |
| add_category "combination" | |
| fi | |
| if [[ "${{ contains(github.event.pull_request.labels.*.name, 'extract') }}" == "true" ]]; then | |
| add_category "extract" | |
| fi | |
| if [[ "${{ contains(github.event.pull_request.labels.*.name, 'act') }}" == "true" ]]; then | |
| add_category "act" | |
| fi | |
| if [[ "${{ contains(github.event.pull_request.labels.*.name, 'observe') }}" == "true" ]]; then | |
| add_category "observe" | |
| fi | |
| if [[ "${{ contains(github.event.pull_request.labels.*.name, 'targeted-extract') }}" == "true" ]]; then | |
| add_category "targeted_extract" | |
| fi | |
| if [[ "${{ contains(github.event.pull_request.labels.*.name, 'agent') }}" == "true" ]]; then | |
| add_category "agent" | |
| fi | |
| emit_categories | |
| run-lint: | |
| name: Lint | |
| runs-on: ubuntu-latest | |
| needs: [run-build] | |
| steps: | |
| - name: Check out repository code | |
| uses: actions/checkout@v4 | |
| - uses: ./.github/actions/setup-node-pnpm-turbo | |
| with: | |
| use-prebuilt-artifacts: "true" | |
| restore-turbo-cache: "false" | |
| node-version: 20.x | |
| - name: Run Lint | |
| run: pnpm exec turbo run lint | |
| cancel-after-lint-failure: | |
| name: Cancel after lint failure | |
| runs-on: ubuntu-latest | |
| needs: [run-lint] | |
| if: ${{ always() && needs.run-lint.result == 'failure' }} | |
| continue-on-error: true | |
| steps: | |
| - name: Cancel workflow run | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| curl -sSfL -X POST \ | |
| -H "Authorization: Bearer ${GITHUB_TOKEN}" \ | |
| -H "Accept: application/vnd.github+json" \ | |
| -H "X-GitHub-Api-Version: 2022-11-28" \ | |
| "https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/cancel" | |
| run-build: | |
| name: Build | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check out repository code | |
| uses: actions/checkout@v4 | |
| - uses: ./.github/actions/setup-node-pnpm-turbo | |
| with: | |
| use-prebuilt-artifacts: "false" | |
| node-version: 20.x | |
| - name: Run Build | |
| run: pnpm exec turbo run build | |
| - name: Save Turbo cache | |
| if: always() | |
| uses: actions/cache/save@v4 | |
| with: | |
| path: .turbo | |
| key: ${{ runner.os }}-turbo-${{ hashFiles('pnpm-lock.yaml', 'pnpm-workspace.yaml', 'package.json', 'turbo.json') }}-${{ github.sha }} | |
| - name: Upload build artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| include-hidden-files: true | |
| # package.json is included to anchor artifact paths at repo root. | |
| path: | | |
| package.json | |
| packages/core/dist/** | |
| packages/core/lib/version.ts | |
| packages/core/lib/dom/build/** | |
| packages/core/lib/v3/dom/build/** | |
| packages/cli/dist/** | |
| packages/evals/dist/** | |
| packages/server-v3/dist/** | |
| packages/server-v3/openapi.v3.yaml | |
| packages/server-v4/dist/** | |
| packages/server-v4/openapi.v4.yaml | |
| retention-days: 1 | |
| run-cli-tests: | |
| name: CLI Tests | |
| runs-on: ubuntu-latest | |
| needs: [run-build, determine-changes] | |
| if: needs.determine-changes.outputs.cli == 'true' | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| - uses: ./.github/actions/setup-node-pnpm-turbo | |
| with: | |
| use-prebuilt-artifacts: "true" | |
| restore-turbo-cache: "false" | |
| - name: Run CLI Tests | |
| run: pnpm exec turbo run test:cli --filter=@browserbasehq/browse-cli | |
| discover-core-tests: | |
| runs-on: ubuntu-latest | |
| needs: [determine-changes] | |
| if: needs.determine-changes.outputs.core == 'true' | |
| outputs: | |
| core-tests: ${{ steps.set-matrix.outputs.core-tests }} | |
| has-core-tests: ${{ steps.set-matrix.outputs.has-core-tests }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| - uses: ./.github/actions/setup-node-pnpm-turbo | |
| with: | |
| use-prebuilt-artifacts: "false" | |
| restore-turbo-cache: "false" | |
| - name: Discover core test files | |
| id: set-matrix | |
| run: | | |
| core_json=$(pnpm --filter @browserbasehq/stagehand --silent run test:core -- --list) | |
| echo "core-tests=$core_json" >> $GITHUB_OUTPUT | |
| if [ "$core_json" = "[]" ]; then | |
| echo "has-core-tests=false" >> $GITHUB_OUTPUT | |
| else | |
| echo "has-core-tests=true" >> $GITHUB_OUTPUT | |
| fi | |
| echo "Found core tests: $core_json" | |
| core-unit-tests: | |
| name: core/${{ matrix.test.name }} | |
| runs-on: ubuntu-latest | |
| needs: [run-build, discover-core-tests] | |
| if: needs.discover-core-tests.outputs.has-core-tests == 'true' | |
| env: | |
| STAGEHAND_BROWSER_TARGET: local | |
| STAGEHAND_SERVER_TARGET: local | |
| strategy: | |
| fail-fast: false | |
| max-parallel: 100 | |
| matrix: | |
| test: ${{ fromJson(needs.discover-core-tests.outputs.core-tests) }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| - uses: ./.github/actions/setup-node-pnpm-turbo | |
| with: | |
| use-prebuilt-artifacts: "true" | |
| restore-turbo-cache: "false" | |
| - name: Run Vitest - ${{ matrix.test.name }} | |
| run: | | |
| pnpm exec turbo run test:core --only --filter=@browserbasehq/stagehand -- "${{ matrix.test.path }}" | |
| - uses: ./.github/actions/upload-ctrf-report | |
| if: always() | |
| with: | |
| name: ctrf/core-unit/${{ matrix.test.name }}.json | |
| - uses: ./.github/actions/upload-v8-coverage | |
| if: always() | |
| with: | |
| name: coverage/core-unit/${{ matrix.test.name }} | |
| discover-server-tests: | |
| runs-on: ubuntu-latest | |
| needs: [determine-changes] | |
| if: needs.determine-changes.outputs.server == 'true' | |
| outputs: | |
| integration-tests: ${{ steps.set-matrix.outputs.integration-tests }} | |
| has-integration-tests: ${{ steps.set-matrix.outputs.has-integration-tests }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| - uses: ./.github/actions/setup-node-pnpm-turbo | |
| with: | |
| use-prebuilt-artifacts: "false" | |
| restore-turbo-cache: "false" | |
| - name: Discover server test files | |
| id: set-matrix | |
| run: | | |
| int_json=$(pnpm --filter @browserbasehq/stagehand-server-v3 --silent run test:server -- --list integration) | |
| echo "integration-tests=$int_json" >> $GITHUB_OUTPUT | |
| if [ "$int_json" = "[]" ]; then | |
| echo "has-integration-tests=false" >> $GITHUB_OUTPUT | |
| else | |
| echo "has-integration-tests=true" >> $GITHUB_OUTPUT | |
| fi | |
| echo "Found server integration tests: $int_json" | |
| build-server-sea: | |
| name: Build SEA binary (tests, v3) | |
| uses: ./.github/workflows/stagehand-server-v3-sea-build.yml | |
| needs: [run-build] | |
| with: | |
| matrix: | | |
| [ | |
| {"os":"ubuntu-latest","platform":"linux","arch":"x64","binary_name":"stagehand-server-v3-linux-x64","include_sourcemaps":false}, | |
| {"os":"ubuntu-24.04-arm","platform":"linux","arch":"arm64","binary_name":"stagehand-server-v3-linux-arm64","include_sourcemaps":false}, | |
| {"os":"macos-15","platform":"darwin","arch":"arm64","binary_name":"stagehand-server-v3-darwin-arm64","include_sourcemaps":false}, | |
| {"os":"macos-15-intel","platform":"darwin","arch":"x64","binary_name":"stagehand-server-v3-darwin-x64","include_sourcemaps":false}, | |
| {"os":"windows-latest","platform":"win32","arch":"x64","binary_name":"stagehand-server-v3-win32-x64.exe","include_sourcemaps":false}, | |
| {"os":"windows-11-arm","platform":"win32","arch":"arm64","binary_name":"stagehand-server-v3-win32-arm64.exe","include_sourcemaps":false}, | |
| {"os":"ubuntu-latest","platform":"linux","arch":"x64","binary_name":"stagehand-server-v3-linux-x64-sourcemap","include_sourcemaps":true} | |
| ] | |
| use-prebuilt-artifacts: "true" | |
| restore-turbo-cache: "false" | |
| node-version: "20.x" | |
| upload-only-binary: stagehand-server-v3-linux-x64-sourcemap | |
| server-integration-tests: | |
| name: server/v3/integration/${{ matrix.test.name }} | |
| runs-on: ubuntu-latest | |
| needs: [build-server-sea, discover-server-tests, run-build] | |
| if: needs.discover-server-tests.outputs.has-integration-tests == 'true' | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| test: ${{ fromJson(needs.discover-server-tests.outputs.integration-tests) }} | |
| env: | |
| BB_ENV: local | |
| STAGEHAND_BASE_URL: http://stagehand-api.localhost:3106 | |
| STAGEHAND_BROWSER_TARGET: local | |
| STAGEHAND_SERVER_TARGET: sea | |
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
| GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} | |
| ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
| # Used only for testing /start with env: BROWSERBASE remote browser | |
| BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} | |
| BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| - uses: ./.github/actions/setup-node-pnpm-turbo | |
| with: | |
| use-prebuilt-artifacts: "true" | |
| restore-turbo-cache: "false" | |
| - name: Download SEA binary | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: stagehand-server-v3-linux-x64-sourcemap | |
| path: . | |
| - name: Ensure SEA binary is present and executable | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| test -f packages/server-v3/dist/sea/stagehand-server-v3-linux-x64-sourcemap | |
| chmod +x packages/server-v3/dist/sea/stagehand-server-v3-linux-x64-sourcemap | |
| - name: Run server integration test - ${{ matrix.test.name }} | |
| env: | |
| SEA_BINARY_NAME: stagehand-server-v3-linux-x64-sourcemap | |
| run: | | |
| pnpm exec turbo run test:server --only --filter=@browserbasehq/stagehand-server-v3 -- "${{ matrix.test.path }}" | |
| - uses: ./.github/actions/upload-ctrf-report | |
| if: always() | |
| with: | |
| name: ctrf/server-v3-integration/${{ matrix.test.name }}.json | |
| - uses: ./.github/actions/upload-v8-coverage | |
| if: always() | |
| with: | |
| name: coverage/server-v3-integration/${{ matrix.test.name }} | |
| discover-e2e-tests: | |
| runs-on: ubuntu-latest | |
| needs: [determine-changes] | |
| if: needs.determine-changes.outputs.core == 'true' | |
| outputs: | |
| e2e-tests: ${{ steps.set-matrix.outputs.e2e-tests }} | |
| has-e2e-tests: ${{ steps.set-matrix.outputs.has-e2e-tests }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| - uses: ./.github/actions/setup-node-pnpm-turbo | |
| with: | |
| use-prebuilt-artifacts: "false" | |
| restore-turbo-cache: "false" | |
| - name: Discover e2e test files | |
| id: set-matrix | |
| run: | | |
| e2e_json=$(pnpm --filter @browserbasehq/stagehand --silent run test:e2e -- --list) | |
| echo "e2e-tests=$e2e_json" >> $GITHUB_OUTPUT | |
| if [ "$e2e_json" = "[]" ]; then | |
| echo "has-e2e-tests=false" >> $GITHUB_OUTPUT | |
| else | |
| echo "has-e2e-tests=true" >> $GITHUB_OUTPUT | |
| fi | |
| echo "Found e2e tests: $e2e_json" | |
| run-e2e-local-tests: | |
| name: e2e/local/${{ matrix.test.name }} | |
| needs: [run-build, discover-e2e-tests] | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 50 | |
| if: > | |
| needs.discover-e2e-tests.outputs.has-e2e-tests == 'true' && | |
| github.event.pull_request.head.repo.full_name == github.repository | |
| env: | |
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
| ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
| GOOGLE_GENERATIVE_AI_API_KEY: ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }} | |
| BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} | |
| BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }} | |
| HEADLESS: true | |
| STAGEHAND_BROWSER_TARGET: local | |
| STAGEHAND_SERVER_TARGET: local | |
| strategy: | |
| fail-fast: false | |
| max-parallel: 20 | |
| matrix: | |
| test: ${{ fromJson(needs.discover-e2e-tests.outputs.e2e-tests) }} | |
| steps: | |
| - name: Check out repository code | |
| uses: actions/checkout@v4 | |
| - uses: ./.github/actions/setup-node-pnpm-turbo | |
| with: | |
| use-prebuilt-artifacts: "true" | |
| restore-turbo-cache: "false" | |
| - uses: ./.github/actions/verify-chromium-launch | |
| - name: Run local E2E Tests - ${{ matrix.test.name }} | |
| run: | | |
| pnpm exec turbo run test:e2e --only --filter=@browserbasehq/stagehand -- "${{ matrix.test.path }}" | |
| - uses: ./.github/actions/upload-ctrf-report | |
| if: always() | |
| with: | |
| name: ctrf/e2e-local/${{ matrix.test.name }}.json | |
| - uses: ./.github/actions/upload-v8-coverage | |
| if: always() | |
| with: | |
| name: coverage/e2e-local/${{ matrix.test.name }} | |
| run-e2e-bb-tests: | |
| name: e2e/bb/${{ matrix.test.name }} | |
| needs: [run-build, discover-e2e-tests] | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 50 | |
| if: > | |
| needs.discover-e2e-tests.outputs.has-e2e-tests == 'true' && | |
| github.event.pull_request.head.repo.full_name == github.repository | |
| env: | |
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
| ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
| GOOGLE_GENERATIVE_AI_API_KEY: ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }} | |
| BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} | |
| BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }} | |
| HEADLESS: true | |
| STAGEHAND_BROWSER_TARGET: browserbase | |
| STAGEHAND_SERVER_TARGET: local | |
| strategy: | |
| fail-fast: false | |
| max-parallel: 100 | |
| matrix: | |
| test: ${{ fromJson(needs.discover-e2e-tests.outputs.e2e-tests) }} | |
| steps: | |
| - name: Check out repository code | |
| uses: actions/checkout@v4 | |
| - uses: ./.github/actions/setup-node-pnpm-turbo | |
| with: | |
| use-prebuilt-artifacts: "true" | |
| restore-turbo-cache: "false" | |
| - name: Select Browserbase region | |
| uses: ./.github/actions/select-browserbase-region | |
| with: | |
| distribution: ${{ env.BROWSERBASE_REGION_DISTRIBUTION }} | |
| - name: Run E2E Tests (browserbase) - ${{ matrix.test.name }} | |
| run: | | |
| pnpm exec turbo run test:e2e --only --filter=@browserbasehq/stagehand -- "${{ matrix.test.path }}" | |
| - uses: ./.github/actions/upload-ctrf-report | |
| if: always() | |
| with: | |
| name: ctrf/e2e-bb/${{ matrix.test.name }}.json | |
| - uses: ./.github/actions/upload-v8-coverage | |
| if: always() | |
| with: | |
| name: coverage/e2e-bb/${{ matrix.test.name }} | |
| run-evals: | |
| name: evals/${{ matrix.category }} | |
| needs: [run-build, determine-evals, run-e2e-bb-tests] | |
| if: >- | |
| ${{ | |
| always() && | |
| needs.run-build.result == 'success' && | |
| needs.determine-evals.result == 'success' && | |
| needs.run-e2e-bb-tests.result != 'failure' && | |
| needs.run-e2e-bb-tests.result != 'cancelled' && | |
| needs.determine-evals.outputs.skip-all-evals != 'true' && | |
| needs.determine-evals.outputs.eval-categories != '[]' | |
| }} | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 90 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| category: ${{ fromJson(needs.determine-evals.outputs.eval-categories) }} | |
| env: | |
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
| ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
| GOOGLE_GENERATIVE_AI_API_KEY: ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }} | |
| BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }} | |
| BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} | |
| BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }} | |
| STAGEHAND_BROWSER_TARGET: browserbase | |
| STAGEHAND_SERVER_TARGET: local | |
| steps: | |
| - name: Check out repository code | |
| uses: actions/checkout@v4 | |
| - uses: ./.github/actions/setup-node-pnpm-turbo | |
| with: | |
| use-prebuilt-artifacts: "true" | |
| restore-turbo-cache: "false" | |
| - name: Select Browserbase region | |
| uses: ./.github/actions/select-browserbase-region | |
| with: | |
| distribution: ${{ env.BROWSERBASE_REGION_DISTRIBUTION }} | |
| - name: Run Evals - ${{ matrix.category }} | |
| id: run-evals | |
| env: | |
| NODE_V8_COVERAGE: coverage/evals/${{ matrix.category }} | |
| run: | | |
| log_file="$(mktemp)" | |
| set +e | |
| pnpm exec turbo run test:evals --only --filter=@browserbasehq/stagehand-evals -- "${{ matrix.category }}" -t "${EVAL_TRIAL_COUNT}" -c "${EVAL_MAX_CONCURRENCY}" 2>&1 | tee "$log_file" | |
| eval_status=${PIPESTATUS[0]} | |
| set -e | |
| summary_block="$( | |
| awk ' | |
| /^=========================SUMMARY=========================$/ { capture=1 } | |
| capture { print } | |
| /^Evaluation summary written to / { capture=0 } | |
| ' "$log_file" | |
| )" | |
| if [ -n "$summary_block" ]; then | |
| { | |
| echo "summary_text<<EOF" | |
| echo "$summary_block" | |
| echo "EOF" | |
| } >> "$GITHUB_OUTPUT" | |
| fi | |
| exit "$eval_status" | |
| - name: Log Evals Performance - ${{ matrix.category }} | |
| env: | |
| EVAL_STDOUT_SUMMARY: ${{ steps.run-evals.outputs.summary_text }} | |
| run: | | |
| if [ -n "${EVAL_STDOUT_SUMMARY:-}" ]; then | |
| echo "### Evals Summary (${{ matrix.category }})" >> "$GITHUB_STEP_SUMMARY" | |
| echo '```' >> "$GITHUB_STEP_SUMMARY" | |
| printf '%s\n' "$EVAL_STDOUT_SUMMARY" >> "$GITHUB_STEP_SUMMARY" | |
| echo '```' >> "$GITHUB_STEP_SUMMARY" | |
| fi | |
| experimentName=$(jq -r '.experimentName' eval-summary.json) | |
| echo "View results at https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentName}" | |
| if [ -f eval-summary.json ]; then | |
| category_score=$(jq ".categories[\"${{ matrix.category }}\"]" eval-summary.json) | |
| echo "${{ matrix.category }} category score: $category_score%" | |
| if (( $(echo "$category_score < 80" | bc -l) )); then | |
| echo "${{ matrix.category }} category score is below 80%. Failing CI." | |
| exit 1 | |
| fi | |
| else | |
| echo "Eval summary not found for ${{ matrix.category }} category. Failing CI." | |
| exit 1 | |
| fi | |
| - uses: ./.github/actions/upload-ctrf-report | |
| if: always() | |
| with: | |
| name: ctrf/evals/${{ matrix.category }}.json | |
| - uses: ./.github/actions/upload-v8-coverage | |
| if: always() | |
| with: | |
| name: coverage/evals/${{ matrix.category }} | |
| merge-coverage: | |
| name: Code Coverage Report | |
| runs-on: ubuntu-latest | |
| needs: | |
| - core-unit-tests | |
| - run-e2e-local-tests | |
| - run-e2e-bb-tests | |
| - run-evals | |
| - server-integration-tests | |
| # if: always() | |
| if: false | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| - uses: ./.github/actions/setup-node-pnpm-turbo | |
| with: | |
| use-prebuilt-artifacts: "true" | |
| restore-turbo-cache: "false" | |
| - name: Download V8 coverage artifacts | |
| uses: actions/download-artifact@v4 | |
| continue-on-error: true | |
| with: | |
| pattern: coverage-* | |
| path: . | |
| merge-multiple: true | |
| - name: Download CTRF artifacts | |
| uses: actions/download-artifact@v4 | |
| continue-on-error: true | |
| with: | |
| pattern: ctrf-* | |
| path: . | |
| merge-multiple: true | |
| - name: Generate merged coverage report | |
| run: | | |
| pnpm run coverage:merge | |
| - name: Upload merged coverage report | |
| if: always() | |
| id: upload-coverage-artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: coverage-merged | |
| # package.json is included to anchor artifact paths at repo root. | |
| path: | | |
| package.json | |
| coverage/merged | |
| - name: Add coverage summary to job summary | |
| if: always() | |
| shell: bash | |
| run: | | |
| echo "### Code Coverage" >> "$GITHUB_STEP_SUMMARY" | |
| echo "" >> "$GITHUB_STEP_SUMMARY" | |
| if [ -f coverage/merged/coverage-summary.txt ]; then | |
| echo '```' >> "$GITHUB_STEP_SUMMARY" | |
| cat coverage/merged/coverage-summary.txt >> "$GITHUB_STEP_SUMMARY" | |
| echo '```' >> "$GITHUB_STEP_SUMMARY" | |
| else | |
| echo "Coverage summary not available." >> "$GITHUB_STEP_SUMMARY" | |
| fi | |
| if [ -n "${{ steps.upload-coverage-artifact.outputs.artifact-url }}" ]; then | |
| echo "" >> "$GITHUB_STEP_SUMMARY" | |
| echo "[Download full HTML coverage report](${{ steps.upload-coverage-artifact.outputs.artifact-url }})" >> "$GITHUB_STEP_SUMMARY" | |
| fi | |
| - name: Publish merged CTRF report | |
| if: always() | |
| uses: ctrf-io/github-test-reporter@v1 | |
| with: | |
| report-path: './ctrf/**/*.json' | |
| summary: true | |
| summary-report: false | |
| summary-delta-report: true | |
| test-report: false | |
| failed-report: false | |
| insights-report: true | |
| flaky-rate-report: true | |
| fail-rate-report: true | |
| slowest-report: true | |
| previous-results-report: true | |
| fetch-previous-results: true | |
| baseline: 1 | |
| previous-results-max: 1 | |
| max-workflow-runs-to-check: 5 | |
| max-previous-runs-to-fetch: 1 | |
| upload-artifact: true | |
| artifact-name: ctrf-report-merged | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Compute coverage status metrics | |
| if: always() | |
| id: coverage-status | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| shopt -s globstar nullglob | |
| tests_failed=0 | |
| ctrf_files=(ctrf/**/*.json) | |
| if [ "${#ctrf_files[@]}" -gt 0 ]; then | |
| tests_failed=$(jq -s '[.[].results.summary.failed // 0] | add' "${ctrf_files[@]}") | |
| fi | |
| total_coverage=0 | |
| if [ -f coverage/merged/coverage-summary.txt ]; then | |
| total_coverage=$(awk '/^Lines/ {gsub(/%/,"",$3); print $3}' coverage/merged/coverage-summary.txt) | |
| fi | |
| echo "tests_failed=${tests_failed}" >> "$GITHUB_OUTPUT" | |
| echo "total_coverage=${total_coverage}" >> "$GITHUB_OUTPUT" | |
| - name: Set coverage status | |
| if: always() | |
| continue-on-error: true | |
| shell: bash | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| RUN_ID: ${{ github.run_id }} | |
| PULL_NUMBER: ${{ github.event.pull_request.number }} | |
| TESTS_FAILED: ${{ steps.coverage-status.outputs.tests_failed }} | |
| TOTAL_COVERAGE: ${{ steps.coverage-status.outputs.total_coverage }} | |
| run: | | |
| set -euo pipefail | |
| repo="${GITHUB_REPOSITORY}" | |
| sha="${GITHUB_SHA}" | |
| tests_failed="${TESTS_FAILED:-0}" | |
| total_coverage="${TOTAL_COVERAGE:-0}" | |
| state="success" | |
| if [ -n "${PULL_NUMBER:-}" ]; then | |
| target_url="https://github.com/${repo}/pull/${PULL_NUMBER}/checks?check_run_id=${RUN_ID}" | |
| else | |
| target_url="https://github.com/${repo}/actions/runs/${RUN_ID}" | |
| fi | |
| description="non-blocking report: ${tests_failed} tests failed. ${total_coverage}% coverage" | |
| payload=$(jq -n \ | |
| --arg state "$state" \ | |
| --arg target_url "$target_url" \ | |
| --arg description "$description" \ | |
| --arg context "Measured coverage" \ | |
| '{state: $state, target_url: $target_url, description: $description, context: $context}') | |
| curl -sSfL -X POST \ | |
| -H "Authorization: Bearer ${GITHUB_TOKEN}" \ | |
| -H "Accept: application/vnd.github+json" \ | |
| -H "X-GitHub-Api-Version: 2022-11-28" \ | |
| "https://api.github.com/repos/${repo}/statuses/${sha}" \ | |
| -d "$payload" |