Skip to content
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
8971646
refactor(reexecute): export NewMainnetCChainVM()
RodrigoVillar Dec 17, 2025
4ec0964
test(reexecute): add firewood chaos test
RodrigoVillar Dec 15, 2025
5c7ec5c
chore: exponential => linear
RodrigoVillar Dec 16, 2025
9a893da
chore: add timeout when waiting for killed process to terminate
RodrigoVillar Dec 16, 2025
4e1b5ac
chore: ctx
RodrigoVillar Dec 17, 2025
7b6d057
chore: rebase nits
RodrigoVillar Dec 17, 2025
c8e8582
chore: remove unnecessary diff
RodrigoVillar Dec 17, 2025
506a2e3
chore: remove deps.go
RodrigoVillar Dec 17, 2025
004bc43
Merge branch 'master' into rodrigo/firewood-chaos-test
RodrigoVillar Dec 17, 2025
98273a0
chore: rename task
RodrigoVillar Dec 17, 2025
d989910
chore: nit
RodrigoVillar Dec 17, 2025
fd0d1e0
Merge branch 'master' into rodrigo/firewood-chaos-test
RodrigoVillar Jan 6, 2026
00a4852
refactor!: replace task logic with script
RodrigoVillar Jan 6, 2026
8b17166
chore: add archival test
RodrigoVillar Jan 6, 2026
8a239e6
fix: EOF
RodrigoVillar Jan 6, 2026
c84015d
fix: config
RodrigoVillar Jan 6, 2026
9b24d42
refactor: remove JSON
RodrigoVillar Jan 6, 2026
f1c7cf0
fix: config (again)
RodrigoVillar Jan 6, 2026
ef82e6a
chore: rename chaos test yml
RodrigoVillar Jan 6, 2026
c793199
refactor!: merge chaos script into reexecution script"
RodrigoVillar Jan 6, 2026
439dd59
Merge branch 'master' into rodrigo/firewood-chaos-test
RodrigoVillar Jan 6, 2026
b472805
chore: nits
RodrigoVillar Jan 6, 2026
219b24a
refactor!: address PR comments
RodrigoVillar Jan 6, 2026
59581a8
chore: defined tests set chaos defaults
RodrigoVillar Jan 6, 2026
c29a101
refactor: simplify chaos test validation conditional
RodrigoVillar Jan 6, 2026
ada0690
doc: make clear that chaos test works only with Firewood
RodrigoVillar Jan 6, 2026
6464738
Merge branch 'master' into rodrigo/firewood-chaos-test
RodrigoVillar Jan 6, 2026
4740b85
chore: license
RodrigoVillar Jan 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions .github/workflows/firewood-chaos-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
name: Firewood Chaos Test

on:
workflow_dispatch:
inputs:
test:
description: 'Test name to run (e.g., chaos-101-250k). Leave empty to use custom inputs below.'
default: ''
# Custom inputs (used when test is not provided)
start-block:
description: 'The start block for the benchmark.'
default: ''
end-block:
description: 'The end block for the benchmark.'
default: ''
block-dir-src:
description: 'The source block directory. Supports S3 directory/zip and local directories.'
default: ''
current-state-dir-src:
description: 'The current state directory. Supports S3 directory/zip and local directories.'
default: ''
# Chaos test specific inputs
config:
description: 'VM config preset (firewood, firewood-archive). Required for custom tests.'
default: 'firewood'
min-wait-time:
description: 'Minimum wait time before killing the process (e.g., 120s, 2m).'
default: '120s'
max-wait-time:
description: 'Maximum wait time before killing the process (e.g., 150s, 3m).'
default: '150s'
runner:
description: 'Runner to execute the chaos test. Input to the runs-on field of the job.'
required: true
timeout-minutes:
description: 'Timeout in minutes for the job.'
default: '60'
# XXX: remove this before merging
pull_request:
Comment on lines +38 to +39
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for testing the PR - will remove prior to merging.

schedule:
- cron: '0 9 * * *' # Runs every day at 09:00 UTC (04:00 EST)

jobs:
define-matrix:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.define-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
- name: Define Matrix
id: define-matrix
shell: bash -x {0}
run: |
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
{
echo "matrix<<EOF"
printf '{ "include": [{ "test": "%s", "start-block": "%s", "end-block": "%s", "block-dir-src": "%s", "current-state-dir-src": "%s", "config": "%s", "min-wait-time": "%s", "max-wait-time": "%s", "runner": "%s", "timeout-minutes": %s }] }\n' \
"${{ github.event.inputs.test }}" \
"${{ github.event.inputs.start-block }}" \
"${{ github.event.inputs.end-block }}" \
"${{ github.event.inputs.block-dir-src }}" \
"${{ github.event.inputs.current-state-dir-src }}" \
"${{ github.event.inputs.config }}" \
"${{ github.event.inputs.min-wait-time }}" \
"${{ github.event.inputs.max-wait-time }}" \
"${{ github.event.inputs.runner }}" \
"${{ github.event.inputs.timeout-minutes }}"
echo EOF
} >> "$GITHUB_OUTPUT"
else
{
echo "matrix<<EOF"
echo '{ "include": [{ "test": "chaos-101-250k" }, { "test": "chaos-archive-101-250k" }] }'
echo EOF
} >> "$GITHUB_OUTPUT"
fi

firewood-chaos-test:
needs: define-matrix
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.define-matrix.outputs.matrix) }}
timeout-minutes: ${{ matrix.timeout-minutes || 60 }}
runs-on: ${{ matrix.runner || 'ubuntu-latest' }}
permissions:
id-token: write
contents: read
steps:
- uses: cachix/install-nix-action@02a151ada4993995686f9ed4f1be7cfbb229e56f #v31
with:
github_access_token: ${{ secrets.GITHUB_TOKEN }}
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_S3_READ_ONLY_ROLE }}
aws-region: 'us-east-2'
role-duration-seconds: '43200'
- uses: actions/checkout@v4
- name: Run chaos test with Firewood
shell: nix develop --impure --command bash -x {0}
run: ./scripts/run_task.sh test-cchain-reexecution -- "${{ matrix.test || '' }}"
env:
CHAOS_MODE: 'true'
START_BLOCK: ${{ matrix.start-block }}
END_BLOCK: ${{ matrix.end-block }}
BLOCK_DIR_SRC: ${{ matrix.block-dir-src }}
CURRENT_STATE_DIR_SRC: ${{ matrix.current-state-dir-src }}
CONFIG: ${{ matrix.config }}
MIN_WAIT_TIME: ${{ matrix.min-wait-time }}
MAX_WAIT_TIME: ${{ matrix.max-wait-time }}

117 changes: 93 additions & 24 deletions scripts/benchmark_cchain_range.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@

set -euo pipefail

# C-Chain Re-execution Benchmark Script
# C-Chain Re-execution Benchmark and Chaos Test Script
#
# Usage:
# ./benchmark_cchain_range.sh [test-name]
#
# Test names starting with "chaos-" run crash tests.
# All other test names run reexecution tests.
#
# To see available tests: use `help` as the test name or invoke
# without a test name and without required env vars.
#
Expand All @@ -24,14 +27,20 @@ set -euo pipefail
# START_BLOCK: The starting block height (inclusive).
# END_BLOCK: The ending block height (inclusive).
#
# Optional:
# CONFIG: VM config preset (default, archive, firewood).
# Optional (reexecution tests):
# CONFIG: VM config preset (default, archive, firewood, firewood-archive).
# LABELS: Comma-separated key=value pairs for metric labels.
# BENCHMARK_OUTPUT_FILE: If set, benchmark output is also written to this file.
# METRICS_SERVER_ENABLED: If set, enables the metrics server.
# METRICS_SERVER_PORT: If set, determines the port the metrics server will listen to.
# METRICS_COLLECTOR_ENABLED: If set, enables the metrics collector.
# PUSH_POST_STATE: S3 destination to push current-state after execution.
#
# Required (chaos tests):
# CHAOS_MODE: Set to "true" to run chaos test with custom parameters.
# CONFIG: VM config preset (firewood, firewood-archive).
# MIN_WAIT_TIME: Minimum wait before crash (e.g., 120s).
# MAX_WAIT_TIME: Maximum wait before crash (e.g., 150s).

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

Expand All @@ -51,13 +60,19 @@ Usage: $0 [test-name]

Available tests:
help - Show this help message

Reexecution tests:
default - Quick test run (blocks 101-200, hashdb)
hashdb-101-250k - Blocks 101-250k with hashdb
hashdb-archive-101-250k - Blocks 101-250k with hashdb archive
hashdb-33m-33m500k - Blocks 33m-33.5m with hashdb
firewood-101-250k - Blocks 101-250k with firewood
firewood-33m-33m500k - Blocks 33m-33.5m with firewood
firewood-33m-40m - Blocks 33m-40m with firewood

Chaos tests:
chaos-101-250k - Blocks 101-250k with Firewood chaos test
chaos-archive-101-250k - Blocks 101-250k with Firewood archive chaos test
EOF
}

Expand Down Expand Up @@ -116,12 +131,36 @@ if [[ -n "$TEST_NAME" ]]; then
END_BLOCK="${END_BLOCK:-40000000}"
CONFIG="${CONFIG:-firewood}"
;;
chaos-101-250k)
BLOCK_DIR_SRC="${BLOCK_DIR_SRC:-cchain-mainnet-blocks-1m-ldb}"
CURRENT_STATE_DIR_SRC="${CURRENT_STATE_DIR_SRC:-cchain-current-state-firewood-100}"
START_BLOCK="${START_BLOCK:-101}"
END_BLOCK="${END_BLOCK:-250000}"
MIN_WAIT_TIME="${MIN_WAIT_TIME:-120s}"
MAX_WAIT_TIME="${MAX_WAIT_TIME:-150s}"
CONFIG="${CONFIG:-firewood}"
;;
chaos-archive-101-250k)
BLOCK_DIR_SRC="${BLOCK_DIR_SRC:-cchain-mainnet-blocks-1m-ldb}"
CURRENT_STATE_DIR_SRC="${CURRENT_STATE_DIR_SRC:-cchain-current-state-firewood-archive-100}"
START_BLOCK="${START_BLOCK:-101}"
END_BLOCK="${END_BLOCK:-250000}"
MIN_WAIT_TIME="${MIN_WAIT_TIME:-120s}"
MAX_WAIT_TIME="${MAX_WAIT_TIME:-150s}"
CONFIG="${CONFIG:-firewood-archive}"
;;
*)
error "Unknown test '$TEST_NAME'"
;;
esac
fi

# Detect if this is a chaos test
IS_CHAOS_TEST=false
if [[ "${TEST_NAME:-}" == chaos-* ]] || [[ "${CHAOS_MODE:-}" == "true" ]]; then
IS_CHAOS_TEST=true
fi

# Determine data source: S3 import or local paths
if [[ -n "${BLOCK_DIR_SRC:-}" && -n "${CURRENT_STATE_DIR_SRC:-}" ]]; then
# S3 mode - import data
Expand Down Expand Up @@ -150,6 +189,9 @@ elif [[ -z "${BLOCK_DIR:-}" || -z "${CURRENT_STATE_DIR:-}" ]]; then
echo " Block range:"
[[ -n "${START_BLOCK:-}" ]] && echo " START_BLOCK: ${START_BLOCK}" || echo " START_BLOCK: (not set)"
[[ -n "${END_BLOCK:-}" ]] && echo " END_BLOCK: ${END_BLOCK}" || echo " END_BLOCK: (not set)"
echo " Timeouts (chaos tests):"
[[ -n "${MIN_WAIT_TIME:-}" ]] && echo " MIN_WAIT_TIME: ${MIN_WAIT_TIME}" || echo " MIN_WAIT_TIME: (not set)"
[[ -n "${MAX_WAIT_TIME:-}" ]] && echo " MAX_WAIT_TIME: ${MAX_WAIT_TIME}" || echo " MAX_WAIT_TIME: (not set)"
exit 1
fi

Expand All @@ -158,25 +200,52 @@ if [[ -z "${START_BLOCK:-}" || -z "${END_BLOCK:-}" ]]; then
error "START_BLOCK and END_BLOCK are required"
fi

echo "=== C-Chain Re-execution: ${TEST_NAME:-custom} ==="
echo "Blocks: ${START_BLOCK} - ${END_BLOCK}"
echo "Config: ${CONFIG:-default}"

echo "=== Running re-execution ==="
go run github.com/ava-labs/avalanchego/tests/reexecute/c \
--block-dir="${BLOCK_DIR}" \
--current-state-dir="${CURRENT_STATE_DIR}" \
${RUNNER_TYPE:+--runner="${RUNNER_TYPE}"} \
${CONFIG:+--config="${CONFIG}"} \
--start-block="${START_BLOCK}" \
--end-block="${END_BLOCK}" \
${LABELS:+--labels="${LABELS}"} \
${BENCHMARK_OUTPUT_FILE:+--benchmark-output-file="${BENCHMARK_OUTPUT_FILE}"} \
${METRICS_SERVER_ENABLED:+--metrics-server-enabled="${METRICS_SERVER_ENABLED}"} \
${METRICS_SERVER_PORT:+--metrics-server-port="${METRICS_SERVER_PORT}"} \
${METRICS_COLLECTOR_ENABLED:+--metrics-collector-enabled="${METRICS_COLLECTOR_ENABLED}"}

if [[ -n "${PUSH_POST_STATE:-}" ]]; then
echo "=== Pushing post-state to S3 ==="
"${SCRIPT_DIR}/copy_dir.sh" "${CURRENT_STATE_DIR}/" "${PUSH_POST_STATE}"
# Chaos tests require additional validation
if [[ "$IS_CHAOS_TEST" == "true" ]]; then
if [[ -z "${CONFIG:-}" ]]; then
error "CONFIG is required for chaos tests"
fi
if [[ -z "${MIN_WAIT_TIME:-}" || -z "${MAX_WAIT_TIME:-}" ]]; then
error "MIN_WAIT_TIME and MAX_WAIT_TIME are required for chaos tests"
fi
fi

if [[ "$IS_CHAOS_TEST" == "true" ]]; then
echo "=== Firewood Chaos Test: ${TEST_NAME:-custom} ==="
echo "Blocks: ${START_BLOCK} - ${END_BLOCK}"
echo "CONFIG: ${CONFIG}"
echo "Crashing between ${MIN_WAIT_TIME} and ${MAX_WAIT_TIME}"

echo "=== Running Chaos Test ==="
go run ./tests/reexecute/chaos \
--start-block="${START_BLOCK}" \
--end-block="${END_BLOCK}" \
--current-state-dir="${CURRENT_STATE_DIR}" \
--block-dir="${BLOCK_DIR}" \
--min-wait-time="${MIN_WAIT_TIME}" \
--max-wait-time="${MAX_WAIT_TIME}" \
--config="${CONFIG}"
else
echo "=== C-Chain Re-execution: ${TEST_NAME:-custom} ==="
echo "Blocks: ${START_BLOCK} - ${END_BLOCK}"
echo "Config: ${CONFIG:-default}"

echo "=== Running re-execution ==="
go run github.com/ava-labs/avalanchego/tests/reexecute/c \
--block-dir="${BLOCK_DIR}" \
--current-state-dir="${CURRENT_STATE_DIR}" \
${RUNNER_TYPE:+--runner="${RUNNER_TYPE}"} \
${CONFIG:+--config="${CONFIG}"} \
--start-block="${START_BLOCK}" \
--end-block="${END_BLOCK}" \
${LABELS:+--labels="${LABELS}"} \
${BENCHMARK_OUTPUT_FILE:+--benchmark-output-file="${BENCHMARK_OUTPUT_FILE}"} \
${METRICS_SERVER_ENABLED:+--metrics-server-enabled="${METRICS_SERVER_ENABLED}"} \
${METRICS_SERVER_PORT:+--metrics-server-port="${METRICS_SERVER_PORT}"} \
${METRICS_COLLECTOR_ENABLED:+--metrics-collector-enabled="${METRICS_COLLECTOR_ENABLED}"}

if [[ -n "${PUSH_POST_STATE:-}" ]]; then
echo "=== Pushing post-state to S3 ==="
"${SCRIPT_DIR}/copy_dir.sh" "${CURRENT_STATE_DIR}/" "${PUSH_POST_STATE}"
fi
fi
Loading