Tests

Bump fastify from 5.6.2 to 5.8.3 #73

Workflow file for this run

	name: Tests

	on:
	pull_request:
	types:
	- opened
	- synchronize
	- labeled
	- unlabeled
	paths-ignore:
	- "packages/docs/**"

	permissions:
	contents: read
	actions: write

	env:
	BROWSERBASE_FLOW_LOGS: "1"
	LLM_MAX_MS: "15000"
	EVAL_MODELS: "openai/gpt-4.1,google/gemini-2.0-flash,anthropic/claude-haiku-4-5"
	EVAL_AGENT_MODELS: "computer-use-preview-2025-03-11,claude-sonnet-4-6"
	EVAL_CATEGORIES: "observe,act,combination,extract,targeted_extract,agent"
	EVAL_MAX_CONCURRENCY: 25
	EVAL_TRIAL_COUNT: 3
	LOCAL_SESSION_LIMIT_PER_E2E_TEST: 2
	BROWSERBASE_SESSION_LIMIT_PER_E2E_TEST: 3
	BROWSERBASE_REGION_DISTRIBUTION: "us-west-2=30,us-east-1=30,eu-central-1=20,ap-southeast-1=20" # percentage of load for each region when running e2e tests against prod
	CHROME_PATH: /usr/bin/chromium # GitHub Actions runners ship with stable Chromium by default
	BROWSERBASE_CDP_CONNECT_MAX_MS: "10000"
	BROWSERBASE_SESSION_CREATE_MAX_MS: "60000"
	PUPPETEER_SKIP_DOWNLOAD: "1"
	PLAYWRIGHT_SKIP_DOWNLOAD: "1"
	TURBO_TELEMETRY_DISABLED: "1"

	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: true

	jobs:
	determine-changes:
	runs-on: ubuntu-latest
	outputs:
	core: ${{ steps.filter.outputs.core }}
	cli: ${{ steps.filter.outputs.cli }}
	evals: ${{ steps.filter.outputs.evals }}
	server: ${{ steps.filter.outputs.server }}
	docs-only: ${{ steps.filter.outputs.docs-only }}
	steps:
	- name: Check out repository code
	uses: actions/checkout@v4

	- name: Log GitHub API rate limit
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	run: \|
	headers_file=$(mktemp)
	body_file=$(mktemp)
	curl -sSL \
	-D "$headers_file" \
	-o "$body_file" \
	-H "Accept: application/vnd.github+json" \
	-H "X-GitHub-Api-Version: 2022-11-28" \
	-H "Authorization: Bearer $GITHUB_TOKEN" \
	https://api.github.com/rate_limit
	cat "$headers_file"
	echo ""
	cat "$body_file"
	remaining=$(jq -r '.rate.remaining' "$body_file")
	if [ "$remaining" -eq 0 ]; then
	reset_epoch=$(jq -r '.rate.reset' "$body_file")
	reset_utc=$(date -u -d "@$reset_epoch" +"%Y-%m-%d %H:%M:%S")
	reset_pacific=$(TZ=America/Los_Angeles date -d "@$reset_epoch" +"%Y-%m-%d %H:%M:%S %Z")
	echo "Github API rate limited until: ${reset_pacific} (${reset_utc} UTC)" >> "$GITHUB_STEP_SUMMARY"
	echo "GitHub API rate limit exhausted."
	exit 1
	fi

	- uses: dorny/paths-filter@v3
	id: filter
	with:
	filters: \|
	core:
	- '.github/workflows/ci.yml'
	- 'packages/core/**'
	- 'package.json'
	- 'pnpm-lock.yaml'
	- 'turbo.json'
	cli:
	- 'packages/cli/**'
	- 'packages/core/**'
	- 'package.json'
	- 'pnpm-lock.yaml'
	evals:
	- 'packages/evals/**'
	- 'package.json'
	- 'pnpm-lock.yaml'
	server:
	- 'packages/server-v3/**'
	- 'packages/server-v4/**'
	- 'packages/core/**'
	- 'package.json'
	- 'pnpm-lock.yaml'
	- 'pnpm-workspace.yaml'
	- '.github/workflows/ci.yml'
	docs-only:
	- '*/.md'
	- 'examples/**'
	- '!packages/*/.md'

	determine-evals:
	needs: [determine-changes]
	runs-on: ubuntu-latest
	outputs:
	skip-all-evals: ${{ steps.check-labels.outputs.skip-all-evals }}
	eval-categories: ${{ steps.check-labels.outputs.eval-categories }}
	steps:
	- id: check-labels
	run: \|
	categories=()
	declare -A seen
	add_category() {
	local category="$1"
	if [[ -z "${seen[$category]:-}" ]]; then
	categories+=("$category")
	seen["$category"]=1
	fi
	}

	emit_categories() {
	local json="["
	for category in "${categories[@]}"; do
	json+="\"${category}\","
	done
	json="${json%,}"
	json+="]"
	echo "eval-categories=$json" >> $GITHUB_OUTPUT
	}

	# Check if skip-evals label is present
	if [[ "${{ contains(github.event.pull_request.labels.*.name, 'skip-evals') }}" == "true" ]]; then
	echo "skip-evals label found - skipping all evals"
	echo "skip-all-evals=true" >> $GITHUB_OUTPUT
	emit_categories
	exit 0
	fi

	# Skip evals if only docs/examples changed
	if [[ "${{ needs.determine-changes.outputs.docs-only }}" == "true" && "${{ needs.determine-changes.outputs.core }}" == "false" && "${{ needs.determine-changes.outputs.evals }}" == "false" ]]; then
	echo "Only docs/examples changed - skipping evals"
	echo "skip-all-evals=true" >> $GITHUB_OUTPUT
	emit_categories
	exit 0
	fi

	# Check for skip-regression-evals label
	if [[ "${{ contains(github.event.pull_request.labels.*.name, 'skip-regression-evals') }}" == "true" ]]; then
	echo "skip-regression-evals label found - regression evals will be skipped"
	else
	echo "Regression evals will run by default"
	add_category "regression"
	fi

	# Check for specific labels
	echo "skip-all-evals=false" >> $GITHUB_OUTPUT
	if [[ "${{ contains(github.event.pull_request.labels.*.name, 'combination') }}" == "true" ]]; then
	add_category "combination"
	fi
	if [[ "${{ contains(github.event.pull_request.labels.*.name, 'extract') }}" == "true" ]]; then
	add_category "extract"
	fi
	if [[ "${{ contains(github.event.pull_request.labels.*.name, 'act') }}" == "true" ]]; then
	add_category "act"
	fi
	if [[ "${{ contains(github.event.pull_request.labels.*.name, 'observe') }}" == "true" ]]; then
	add_category "observe"
	fi
	if [[ "${{ contains(github.event.pull_request.labels.*.name, 'targeted-extract') }}" == "true" ]]; then
	add_category "targeted_extract"
	fi
	if [[ "${{ contains(github.event.pull_request.labels.*.name, 'agent') }}" == "true" ]]; then
	add_category "agent"
	fi
	emit_categories

	run-lint:
	name: Lint
	runs-on: ubuntu-latest
	needs: [run-build]
	steps:
	- name: Check out repository code
	uses: actions/checkout@v4

	- uses: ./.github/actions/setup-node-pnpm-turbo
	with:
	use-prebuilt-artifacts: "true"
	restore-turbo-cache: "false"
	node-version: 20.x

	- name: Run Lint
	run: pnpm exec turbo run lint

	cancel-after-lint-failure:
	name: Cancel after lint failure
	runs-on: ubuntu-latest
	needs: [run-lint]
	if: ${{ always() && needs.run-lint.result == 'failure' }}
	continue-on-error: true
	steps:
	- name: Cancel workflow run
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	run: \|
	curl -sSfL -X POST \
	-H "Authorization: Bearer ${GITHUB_TOKEN}" \
	-H "Accept: application/vnd.github+json" \
	-H "X-GitHub-Api-Version: 2022-11-28" \
	"https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/cancel"

	run-build:
	name: Build
	runs-on: ubuntu-latest
	steps:
	- name: Check out repository code
	uses: actions/checkout@v4

	- uses: ./.github/actions/setup-node-pnpm-turbo
	with:
	use-prebuilt-artifacts: "false"
	node-version: 20.x

	- name: Run Build
	run: pnpm exec turbo run build

	- name: Save Turbo cache
	if: always()
	uses: actions/cache/save@v4
	with:
	path: .turbo
	key: ${{ runner.os }}-turbo-${{ hashFiles('pnpm-lock.yaml', 'pnpm-workspace.yaml', 'package.json', 'turbo.json') }}-${{ github.sha }}

	- name: Upload build artifacts
	uses: actions/upload-artifact@v4
	with:
	name: build-artifacts
	include-hidden-files: true
	# package.json is included to anchor artifact paths at repo root.
	path: \|
	package.json
	packages/core/dist/**
	packages/core/lib/version.ts
	packages/core/lib/dom/build/**
	packages/core/lib/v3/dom/build/**
	packages/cli/dist/**
	packages/evals/dist/**
	packages/server-v3/dist/**
	packages/server-v3/openapi.v3.yaml
	packages/server-v4/dist/**
	packages/server-v4/openapi.v4.yaml
	retention-days: 1

	run-cli-tests:
	name: CLI Tests
	runs-on: ubuntu-latest
	needs: [run-build, determine-changes]
	if: needs.determine-changes.outputs.cli == 'true'
	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: 1

	- uses: ./.github/actions/setup-node-pnpm-turbo
	with:
	use-prebuilt-artifacts: "true"
	restore-turbo-cache: "false"

	- name: Run CLI Tests
	run: pnpm exec turbo run test:cli --filter=@browserbasehq/browse-cli

	discover-core-tests:
	runs-on: ubuntu-latest
	needs: [determine-changes]
	if: needs.determine-changes.outputs.core == 'true'
	outputs:
	core-tests: ${{ steps.set-matrix.outputs.core-tests }}
	has-core-tests: ${{ steps.set-matrix.outputs.has-core-tests }}

	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: 1

	- uses: ./.github/actions/setup-node-pnpm-turbo
	with:
	use-prebuilt-artifacts: "false"
	restore-turbo-cache: "false"

	- name: Discover core test files
	id: set-matrix
	run: \|
	core_json=$(pnpm --filter @browserbasehq/stagehand --silent run test:core -- --list)
	echo "core-tests=$core_json" >> $GITHUB_OUTPUT

	if [ "$core_json" = "[]" ]; then
	echo "has-core-tests=false" >> $GITHUB_OUTPUT
	else
	echo "has-core-tests=true" >> $GITHUB_OUTPUT
	fi

	echo "Found core tests: $core_json"

	core-unit-tests:
	name: core/${{ matrix.test.name }}
	runs-on: ubuntu-latest
	needs: [run-build, discover-core-tests]
	if: needs.discover-core-tests.outputs.has-core-tests == 'true'
	env:
	STAGEHAND_BROWSER_TARGET: local
	STAGEHAND_SERVER_TARGET: local

	strategy:
	fail-fast: false
	max-parallel: 100
	matrix:
	test: ${{ fromJson(needs.discover-core-tests.outputs.core-tests) }}

	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: 1

	- uses: ./.github/actions/setup-node-pnpm-turbo
	with:
	use-prebuilt-artifacts: "true"
	restore-turbo-cache: "false"

	- name: Run Vitest - ${{ matrix.test.name }}
	run: \|
	pnpm exec turbo run test:core --only --filter=@browserbasehq/stagehand -- "${{ matrix.test.path }}"

	- uses: ./.github/actions/upload-ctrf-report
	if: always()
	with:
	name: ctrf/core-unit/${{ matrix.test.name }}.json

	- uses: ./.github/actions/upload-v8-coverage
	if: always()
	with:
	name: coverage/core-unit/${{ matrix.test.name }}

	discover-server-tests:
	runs-on: ubuntu-latest
	needs: [determine-changes]
	if: needs.determine-changes.outputs.server == 'true'
	outputs:
	integration-tests: ${{ steps.set-matrix.outputs.integration-tests }}
	has-integration-tests: ${{ steps.set-matrix.outputs.has-integration-tests }}

	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: 1

	- uses: ./.github/actions/setup-node-pnpm-turbo
	with:
	use-prebuilt-artifacts: "false"
	restore-turbo-cache: "false"

	- name: Discover server test files
	id: set-matrix
	run: \|
	int_json=$(pnpm --filter @browserbasehq/stagehand-server-v3 --silent run test:server -- --list integration)
	echo "integration-tests=$int_json" >> $GITHUB_OUTPUT

	if [ "$int_json" = "[]" ]; then
	echo "has-integration-tests=false" >> $GITHUB_OUTPUT
	else
	echo "has-integration-tests=true" >> $GITHUB_OUTPUT
	fi

	echo "Found server integration tests: $int_json"

	build-server-sea:
	name: Build SEA binary (tests, v3)
	uses: ./.github/workflows/stagehand-server-v3-sea-build.yml
	needs: [run-build]
	with:
	matrix: \|
	[
	{"os":"ubuntu-latest","platform":"linux","arch":"x64","binary_name":"stagehand-server-v3-linux-x64","include_sourcemaps":false},
	{"os":"ubuntu-24.04-arm","platform":"linux","arch":"arm64","binary_name":"stagehand-server-v3-linux-arm64","include_sourcemaps":false},
	{"os":"macos-15","platform":"darwin","arch":"arm64","binary_name":"stagehand-server-v3-darwin-arm64","include_sourcemaps":false},
	{"os":"macos-15-intel","platform":"darwin","arch":"x64","binary_name":"stagehand-server-v3-darwin-x64","include_sourcemaps":false},
	{"os":"windows-latest","platform":"win32","arch":"x64","binary_name":"stagehand-server-v3-win32-x64.exe","include_sourcemaps":false},
	{"os":"windows-11-arm","platform":"win32","arch":"arm64","binary_name":"stagehand-server-v3-win32-arm64.exe","include_sourcemaps":false},
	{"os":"ubuntu-latest","platform":"linux","arch":"x64","binary_name":"stagehand-server-v3-linux-x64-sourcemap","include_sourcemaps":true}
	]
	use-prebuilt-artifacts: "true"
	restore-turbo-cache: "false"
	node-version: "20.x"
	upload-only-binary: stagehand-server-v3-linux-x64-sourcemap

	server-integration-tests:
	name: server/v3/integration/${{ matrix.test.name }}
	runs-on: ubuntu-latest
	needs: [build-server-sea, discover-server-tests, run-build]
	if: needs.discover-server-tests.outputs.has-integration-tests == 'true'

	strategy:
	fail-fast: false
	matrix:
	test: ${{ fromJson(needs.discover-server-tests.outputs.integration-tests) }}

	env:
	BB_ENV: local
	STAGEHAND_BASE_URL: http://stagehand-api.localhost:3106
	STAGEHAND_BROWSER_TARGET: local
	STAGEHAND_SERVER_TARGET: sea
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
	GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
	ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
	# Used only for testing /start with env: BROWSERBASE remote browser
	BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
	BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}

	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: 1

	- uses: ./.github/actions/setup-node-pnpm-turbo
	with:
	use-prebuilt-artifacts: "true"
	restore-turbo-cache: "false"

	- name: Download SEA binary
	uses: actions/download-artifact@v4
	with:
	name: stagehand-server-v3-linux-x64-sourcemap
	path: .

	- name: Ensure SEA binary is present and executable
	shell: bash
	run: \|
	set -euo pipefail
	test -f packages/server-v3/dist/sea/stagehand-server-v3-linux-x64-sourcemap
	chmod +x packages/server-v3/dist/sea/stagehand-server-v3-linux-x64-sourcemap

	- name: Run server integration test - ${{ matrix.test.name }}
	env:
	SEA_BINARY_NAME: stagehand-server-v3-linux-x64-sourcemap
	run: \|
	pnpm exec turbo run test:server --only --filter=@browserbasehq/stagehand-server-v3 -- "${{ matrix.test.path }}"

	- uses: ./.github/actions/upload-ctrf-report
	if: always()
	with:
	name: ctrf/server-v3-integration/${{ matrix.test.name }}.json

	- uses: ./.github/actions/upload-v8-coverage
	if: always()
	with:
	name: coverage/server-v3-integration/${{ matrix.test.name }}

	discover-e2e-tests:
	runs-on: ubuntu-latest
	needs: [determine-changes]
	if: needs.determine-changes.outputs.core == 'true'
	outputs:
	e2e-tests: ${{ steps.set-matrix.outputs.e2e-tests }}
	has-e2e-tests: ${{ steps.set-matrix.outputs.has-e2e-tests }}

	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: 1

	- uses: ./.github/actions/setup-node-pnpm-turbo
	with:
	use-prebuilt-artifacts: "false"
	restore-turbo-cache: "false"

	- name: Discover e2e test files
	id: set-matrix
	run: \|
	e2e_json=$(pnpm --filter @browserbasehq/stagehand --silent run test:e2e -- --list)
	echo "e2e-tests=$e2e_json" >> $GITHUB_OUTPUT

	if [ "$e2e_json" = "[]" ]; then
	echo "has-e2e-tests=false" >> $GITHUB_OUTPUT
	else
	echo "has-e2e-tests=true" >> $GITHUB_OUTPUT
	fi

	echo "Found e2e tests: $e2e_json"

	run-e2e-local-tests:
	name: e2e/local/${{ matrix.test.name }}
	needs: [run-build, discover-e2e-tests]
	runs-on: ubuntu-latest
	timeout-minutes: 50
	if: >
	needs.discover-e2e-tests.outputs.has-e2e-tests == 'true' &&
	github.event.pull_request.head.repo.full_name == github.repository
	env:
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
	ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
	GOOGLE_GENERATIVE_AI_API_KEY: ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }}
	BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
	BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
	HEADLESS: true
	STAGEHAND_BROWSER_TARGET: local
	STAGEHAND_SERVER_TARGET: local
	strategy:
	fail-fast: false
	max-parallel: 20
	matrix:
	test: ${{ fromJson(needs.discover-e2e-tests.outputs.e2e-tests) }}
	steps:
	- name: Check out repository code
	uses: actions/checkout@v4

	- uses: ./.github/actions/setup-node-pnpm-turbo
	with:
	use-prebuilt-artifacts: "true"
	restore-turbo-cache: "false"

	- uses: ./.github/actions/verify-chromium-launch

	- name: Run local E2E Tests - ${{ matrix.test.name }}
	run: \|
	pnpm exec turbo run test:e2e --only --filter=@browserbasehq/stagehand -- "${{ matrix.test.path }}"

	- uses: ./.github/actions/upload-ctrf-report
	if: always()
	with:
	name: ctrf/e2e-local/${{ matrix.test.name }}.json

	- uses: ./.github/actions/upload-v8-coverage
	if: always()
	with:
	name: coverage/e2e-local/${{ matrix.test.name }}

	run-e2e-bb-tests:
	name: e2e/bb/${{ matrix.test.name }}
	needs: [run-build, discover-e2e-tests]
	runs-on: ubuntu-latest
	timeout-minutes: 50
	if: >
	needs.discover-e2e-tests.outputs.has-e2e-tests == 'true' &&
	github.event.pull_request.head.repo.full_name == github.repository
	env:
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
	ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
	GOOGLE_GENERATIVE_AI_API_KEY: ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }}
	BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
	BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
	HEADLESS: true
	STAGEHAND_BROWSER_TARGET: browserbase
	STAGEHAND_SERVER_TARGET: local
	strategy:
	fail-fast: false
	max-parallel: 100
	matrix:
	test: ${{ fromJson(needs.discover-e2e-tests.outputs.e2e-tests) }}
	steps:
	- name: Check out repository code
	uses: actions/checkout@v4

	- uses: ./.github/actions/setup-node-pnpm-turbo
	with:
	use-prebuilt-artifacts: "true"
	restore-turbo-cache: "false"

	- name: Select Browserbase region
	uses: ./.github/actions/select-browserbase-region
	with:
	distribution: ${{ env.BROWSERBASE_REGION_DISTRIBUTION }}

	- name: Run E2E Tests (browserbase) - ${{ matrix.test.name }}
	run: \|
	pnpm exec turbo run test:e2e --only --filter=@browserbasehq/stagehand -- "${{ matrix.test.path }}"

	- uses: ./.github/actions/upload-ctrf-report
	if: always()
	with:
	name: ctrf/e2e-bb/${{ matrix.test.name }}.json

	- uses: ./.github/actions/upload-v8-coverage
	if: always()
	with:
	name: coverage/e2e-bb/${{ matrix.test.name }}

	run-evals:
	name: evals/${{ matrix.category }}
	needs: [run-build, determine-evals, run-e2e-bb-tests]
	if: >-
	${{
	always() &&
	needs.run-build.result == 'success' &&
	needs.determine-evals.result == 'success' &&
	needs.run-e2e-bb-tests.result != 'failure' &&
	needs.run-e2e-bb-tests.result != 'cancelled' &&
	needs.determine-evals.outputs.skip-all-evals != 'true' &&
	needs.determine-evals.outputs.eval-categories != '[]'
	}}
	runs-on: ubuntu-latest
	timeout-minutes: 90
	strategy:
	fail-fast: false
	matrix:
	category: ${{ fromJson(needs.determine-evals.outputs.eval-categories) }}
	env:
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
	ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
	GOOGLE_GENERATIVE_AI_API_KEY: ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }}
	BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
	BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
	BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
	STAGEHAND_BROWSER_TARGET: browserbase
	STAGEHAND_SERVER_TARGET: local
	steps:
	- name: Check out repository code
	uses: actions/checkout@v4

	- uses: ./.github/actions/setup-node-pnpm-turbo
	with:
	use-prebuilt-artifacts: "true"
	restore-turbo-cache: "false"

	- name: Select Browserbase region
	uses: ./.github/actions/select-browserbase-region
	with:
	distribution: ${{ env.BROWSERBASE_REGION_DISTRIBUTION }}

	- name: Run Evals - ${{ matrix.category }}
	id: run-evals
	env:
	NODE_V8_COVERAGE: coverage/evals/${{ matrix.category }}
	run: \|
	log_file="$(mktemp)"
	set +e
	pnpm exec turbo run test:evals --only --filter=@browserbasehq/stagehand-evals -- "${{ matrix.category }}" -t "${EVAL_TRIAL_COUNT}" -c "${EVAL_MAX_CONCURRENCY}" 2>&1 \| tee "$log_file"
	eval_status=${PIPESTATUS[0]}
	set -e

	summary_block="$(
	awk '
	/^=========================SUMMARY=========================$/ { capture=1 }
	capture { print }
	/^Evaluation summary written to / { capture=0 }
	' "$log_file"
	)"

	if [ -n "$summary_block" ]; then
	{
	echo "summary_text<<EOF"
	echo "$summary_block"
	echo "EOF"
	} >> "$GITHUB_OUTPUT"
	fi

	exit "$eval_status"

	- name: Log Evals Performance - ${{ matrix.category }}
	env:
	EVAL_STDOUT_SUMMARY: ${{ steps.run-evals.outputs.summary_text }}
	run: \|
	if [ -n "${EVAL_STDOUT_SUMMARY:-}" ]; then
	echo "### Evals Summary (${{ matrix.category }})" >> "$GITHUB_STEP_SUMMARY"
	echo '```' >> "$GITHUB_STEP_SUMMARY"
	printf '%s\n' "$EVAL_STDOUT_SUMMARY" >> "$GITHUB_STEP_SUMMARY"
	echo '```' >> "$GITHUB_STEP_SUMMARY"
	fi
	experimentName=$(jq -r '.experimentName' eval-summary.json)
	echo "View results at https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentName}"
	if [ -f eval-summary.json ]; then
	category_score=$(jq ".categories[\"${{ matrix.category }}\"]" eval-summary.json)
	echo "${{ matrix.category }} category score: $category_score%"
	if (( $(echo "$category_score < 80" \| bc -l) )); then
	echo "${{ matrix.category }} category score is below 80%. Failing CI."
	exit 1
	fi
	else
	echo "Eval summary not found for ${{ matrix.category }} category. Failing CI."
	exit 1
	fi

	- uses: ./.github/actions/upload-ctrf-report
	if: always()
	with:
	name: ctrf/evals/${{ matrix.category }}.json

	- uses: ./.github/actions/upload-v8-coverage
	if: always()
	with:
	name: coverage/evals/${{ matrix.category }}

	merge-coverage:
	name: Code Coverage Report
	runs-on: ubuntu-latest
	needs:
	- core-unit-tests
	- run-e2e-local-tests
	- run-e2e-bb-tests
	- run-evals
	- server-integration-tests
	# if: always()
	if: false
	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: 1

	- uses: ./.github/actions/setup-node-pnpm-turbo
	with:
	use-prebuilt-artifacts: "true"
	restore-turbo-cache: "false"

	- name: Download V8 coverage artifacts
	uses: actions/download-artifact@v4
	continue-on-error: true
	with:
	pattern: coverage-*
	path: .
	merge-multiple: true

	- name: Download CTRF artifacts
	uses: actions/download-artifact@v4
	continue-on-error: true
	with:
	pattern: ctrf-*
	path: .
	merge-multiple: true

	- name: Generate merged coverage report
	run: \|
	pnpm run coverage:merge

	- name: Upload merged coverage report
	if: always()
	id: upload-coverage-artifact
	uses: actions/upload-artifact@v4
	with:
	name: coverage-merged
	# package.json is included to anchor artifact paths at repo root.
	path: \|
	package.json
	coverage/merged

	- name: Add coverage summary to job summary
	if: always()
	shell: bash
	run: \|
	echo "### Code Coverage" >> "$GITHUB_STEP_SUMMARY"
	echo "" >> "$GITHUB_STEP_SUMMARY"
	if [ -f coverage/merged/coverage-summary.txt ]; then
	echo '```' >> "$GITHUB_STEP_SUMMARY"
	cat coverage/merged/coverage-summary.txt >> "$GITHUB_STEP_SUMMARY"
	echo '```' >> "$GITHUB_STEP_SUMMARY"
	else
	echo "Coverage summary not available." >> "$GITHUB_STEP_SUMMARY"
	fi
	if [ -n "${{ steps.upload-coverage-artifact.outputs.artifact-url }}" ]; then
	echo "" >> "$GITHUB_STEP_SUMMARY"
	echo "[Download full HTML coverage report](${{ steps.upload-coverage-artifact.outputs.artifact-url }})" >> "$GITHUB_STEP_SUMMARY"
	fi

	- name: Publish merged CTRF report
	if: always()
	uses: ctrf-io/github-test-reporter@v1
	with:
	report-path: './ctrf/*/.json'
	summary: true
	summary-report: false
	summary-delta-report: true
	test-report: false
	failed-report: false
	insights-report: true
	flaky-rate-report: true
	fail-rate-report: true
	slowest-report: true
	previous-results-report: true
	fetch-previous-results: true
	baseline: 1
	previous-results-max: 1
	max-workflow-runs-to-check: 5
	max-previous-runs-to-fetch: 1
	upload-artifact: true
	artifact-name: ctrf-report-merged
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

	- name: Compute coverage status metrics
	if: always()
	id: coverage-status
	shell: bash
	run: \|
	set -euo pipefail
	shopt -s globstar nullglob
	tests_failed=0
	ctrf_files=(ctrf/*/.json)
	if [ "${#ctrf_files[@]}" -gt 0 ]; then
	tests_failed=$(jq -s '[.[].results.summary.failed // 0] \| add' "${ctrf_files[@]}")
	fi
	total_coverage=0
	if [ -f coverage/merged/coverage-summary.txt ]; then
	total_coverage=$(awk '/^Lines/ {gsub(/%/,"",$3); print $3}' coverage/merged/coverage-summary.txt)
	fi
	echo "tests_failed=${tests_failed}" >> "$GITHUB_OUTPUT"
	echo "total_coverage=${total_coverage}" >> "$GITHUB_OUTPUT"

	- name: Set coverage status
	if: always()
	continue-on-error: true
	shell: bash
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	RUN_ID: ${{ github.run_id }}
	PULL_NUMBER: ${{ github.event.pull_request.number }}
	TESTS_FAILED: ${{ steps.coverage-status.outputs.tests_failed }}
	TOTAL_COVERAGE: ${{ steps.coverage-status.outputs.total_coverage }}
	run: \|
	set -euo pipefail
	repo="${GITHUB_REPOSITORY}"
	sha="${GITHUB_SHA}"
	tests_failed="${TESTS_FAILED:-0}"
	total_coverage="${TOTAL_COVERAGE:-0}"
	state="success"
	if [ -n "${PULL_NUMBER:-}" ]; then
	target_url="https://github.com/${repo}/pull/${PULL_NUMBER}/checks?check_run_id=${RUN_ID}"
	else
	target_url="https://github.com/${repo}/actions/runs/${RUN_ID}"
	fi
	description="non-blocking report: ${tests_failed} tests failed. ${total_coverage}% coverage"
	payload=$(jq -n \
	--arg state "$state" \
	--arg target_url "$target_url" \
	--arg description "$description" \
	--arg context "Measured coverage" \
	'{state: $state, target_url: $target_url, description: $description, context: $context}')
	curl -sSfL -X POST \
	-H "Authorization: Bearer ${GITHUB_TOKEN}" \
	-H "Accept: application/vnd.github+json" \
	-H "X-GitHub-Api-Version: 2022-11-28" \
	"https://api.github.com/repos/${repo}/statuses/${sha}" \
	-d "$payload"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Bump fastify from 5.6.2 to 5.8.3 #73

Workflow file

Bump fastify from 5.6.2 to 5.8.3 #73

Uh oh!

Workflow file for this run