From 8971646585912ccdf22601bb29751a9c1f6670e6 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Wed, 17 Dec 2025 10:14:28 -0500 Subject: [PATCH 01/24] refactor(reexecute): export NewMainnetCChainVM() --- tests/reexecute/c/vm_reexecute.go | 103 +------------------------- tests/reexecute/vm.go | 117 ++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 102 deletions(-) create mode 100644 tests/reexecute/vm.go diff --git a/tests/reexecute/c/vm_reexecute.go b/tests/reexecute/c/vm_reexecute.go index 586b4c6a2116..5155861dacf1 100644 --- a/tests/reexecute/c/vm_reexecute.go +++ b/tests/reexecute/c/vm_reexecute.go @@ -22,35 +22,15 @@ import ( "go.uber.org/zap" "github.com/ava-labs/avalanchego/api/metrics" - "github.com/ava-labs/avalanchego/chains/atomic" - "github.com/ava-labs/avalanchego/database" "github.com/ava-labs/avalanchego/database/leveldb" - "github.com/ava-labs/avalanchego/database/prefixdb" - "github.com/ava-labs/avalanchego/genesis" "github.com/ava-labs/avalanchego/graft/coreth/plugin/evm" - "github.com/ava-labs/avalanchego/graft/coreth/plugin/factory" - "github.com/ava-labs/avalanchego/ids" - "github.com/ava-labs/avalanchego/snow" - "github.com/ava-labs/avalanchego/snow/engine/enginetest" "github.com/ava-labs/avalanchego/snow/engine/snowman/block" - "github.com/ava-labs/avalanchego/snow/validators/validatorstest" "github.com/ava-labs/avalanchego/tests" "github.com/ava-labs/avalanchego/tests/fixture/tmpnet" "github.com/ava-labs/avalanchego/tests/reexecute" - "github.com/ava-labs/avalanchego/upgrade" - "github.com/ava-labs/avalanchego/utils/constants" - "github.com/ava-labs/avalanchego/utils/crypto/bls/signer/localsigner" "github.com/ava-labs/avalanchego/utils/logging" "github.com/ava-labs/avalanchego/utils/perms" "github.com/ava-labs/avalanchego/utils/timer" - "github.com/ava-labs/avalanchego/vms/metervm" - "github.com/ava-labs/avalanchego/vms/platformvm/warp" -) - -var ( - mainnetXChainID = ids.FromStringOrPanic("2oYMBNV4eNHyqk2fjjV5nVQLDbtmNJzq5s3qs3Lo6ftnC6FByM") - mainnetCChainID = ids.FromStringOrPanic("2q9e4r6Mu3U68nU1fYjgbR6JvwrRx36CohpAX5UQxse55x1Q5") - mainnetAvaxAssetID = ids.FromStringOrPanic("FvwEAhmxKfeiG8SnEvq42hc6whRyY3EFYAvebMqDNDGCgxN5Z") ) var ( @@ -252,7 +232,7 @@ func benchmarkReexecuteRange( r.NoError(db.Close()) }() - vm, err := newMainnetCChainVM( + vm, err := reexecute.NewMainnetCChainVM( ctx, db, chainDataDir, @@ -289,87 +269,6 @@ func benchmarkReexecuteRange( } } -func newMainnetCChainVM( - ctx context.Context, - vmAndSharedMemoryDB database.Database, - chainDataDir string, - configBytes []byte, - vmMultiGatherer metrics.MultiGatherer, - meterVMRegistry prometheus.Registerer, -) (block.ChainVM, error) { - factory := factory.Factory{} - vmIntf, err := factory.New(logging.NoLog{}) - if err != nil { - return nil, fmt.Errorf("failed to create VM from factory: %w", err) - } - vm := vmIntf.(block.ChainVM) - - blsKey, err := localsigner.New() - if err != nil { - return nil, fmt.Errorf("failed to create BLS key: %w", err) - } - - blsPublicKey := blsKey.PublicKey() - warpSigner := warp.NewSigner(blsKey, constants.MainnetID, mainnetCChainID) - - genesisConfig := genesis.GetConfig(constants.MainnetID) - - sharedMemoryDB := prefixdb.New([]byte("sharedmemory"), vmAndSharedMemoryDB) - atomicMemory := atomic.NewMemory(sharedMemoryDB) - - chainIDToSubnetID := map[ids.ID]ids.ID{ - mainnetXChainID: constants.PrimaryNetworkID, - mainnetCChainID: constants.PrimaryNetworkID, - ids.Empty: constants.PrimaryNetworkID, - } - - vm = metervm.NewBlockVM(vm, meterVMRegistry) - - if err := vm.Initialize( - ctx, - &snow.Context{ - NetworkID: constants.MainnetID, - SubnetID: constants.PrimaryNetworkID, - ChainID: mainnetCChainID, - NodeID: ids.GenerateTestNodeID(), - PublicKey: blsPublicKey, - NetworkUpgrades: upgrade.Mainnet, - - XChainID: mainnetXChainID, - CChainID: mainnetCChainID, - AVAXAssetID: mainnetAvaxAssetID, - - Log: tests.NewDefaultLogger("mainnet-vm-reexecution"), - SharedMemory: atomicMemory.NewSharedMemory(mainnetCChainID), - BCLookup: ids.NewAliaser(), - Metrics: vmMultiGatherer, - - WarpSigner: warpSigner, - - ValidatorState: &validatorstest.State{ - GetSubnetIDF: func(_ context.Context, chainID ids.ID) (ids.ID, error) { - subnetID, ok := chainIDToSubnetID[chainID] - if ok { - return subnetID, nil - } - return ids.Empty, fmt.Errorf("unknown chainID: %s", chainID) - }, - }, - ChainDataDir: chainDataDir, - }, - prefixdb.New([]byte("vm"), vmAndSharedMemoryDB), - []byte(genesisConfig.CChainGenesis), - nil, - configBytes, - nil, - &enginetest.Sender{}, - ); err != nil { - return nil, fmt.Errorf("failed to initialize VM: %w", err) - } - - return vm, nil -} - type vmExecutorConfig struct { Log logging.Logger // Registry is the registry to register the metrics with. diff --git a/tests/reexecute/vm.go b/tests/reexecute/vm.go new file mode 100644 index 000000000000..5f2e568ccff0 --- /dev/null +++ b/tests/reexecute/vm.go @@ -0,0 +1,117 @@ +// Copyright (C) 2019-2025, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package reexecute + +import ( + "context" + "fmt" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/ava-labs/avalanchego/api/metrics" + "github.com/ava-labs/avalanchego/chains/atomic" + "github.com/ava-labs/avalanchego/database" + "github.com/ava-labs/avalanchego/database/prefixdb" + "github.com/ava-labs/avalanchego/genesis" + "github.com/ava-labs/avalanchego/graft/coreth/plugin/factory" + "github.com/ava-labs/avalanchego/ids" + "github.com/ava-labs/avalanchego/snow" + "github.com/ava-labs/avalanchego/snow/engine/enginetest" + "github.com/ava-labs/avalanchego/snow/engine/snowman/block" + "github.com/ava-labs/avalanchego/snow/validators/validatorstest" + "github.com/ava-labs/avalanchego/tests" + "github.com/ava-labs/avalanchego/upgrade" + "github.com/ava-labs/avalanchego/utils/constants" + "github.com/ava-labs/avalanchego/utils/crypto/bls/signer/localsigner" + "github.com/ava-labs/avalanchego/utils/logging" + "github.com/ava-labs/avalanchego/vms/metervm" + "github.com/ava-labs/avalanchego/vms/platformvm/warp" +) + +var ( + mainnetXChainID = ids.FromStringOrPanic("2oYMBNV4eNHyqk2fjjV5nVQLDbtmNJzq5s3qs3Lo6ftnC6FByM") + mainnetCChainID = ids.FromStringOrPanic("2q9e4r6Mu3U68nU1fYjgbR6JvwrRx36CohpAX5UQxse55x1Q5") + mainnetAvaxAssetID = ids.FromStringOrPanic("FvwEAhmxKfeiG8SnEvq42hc6whRyY3EFYAvebMqDNDGCgxN5Z") +) + +func NewMainnetCChainVM( + ctx context.Context, + vmAndSharedMemoryDB database.Database, + chainDataDir string, + configBytes []byte, + vmMultiGatherer metrics.MultiGatherer, + meterVMRegistry prometheus.Registerer, +) (block.ChainVM, error) { + factory := factory.Factory{} + vmIntf, err := factory.New(logging.NoLog{}) + if err != nil { + return nil, fmt.Errorf("failed to create VM from factory: %w", err) + } + vm := vmIntf.(block.ChainVM) + + blsKey, err := localsigner.New() + if err != nil { + return nil, fmt.Errorf("failed to create BLS key: %w", err) + } + + blsPublicKey := blsKey.PublicKey() + warpSigner := warp.NewSigner(blsKey, constants.MainnetID, mainnetCChainID) + + genesisConfig := genesis.GetConfig(constants.MainnetID) + + sharedMemoryDB := prefixdb.New([]byte("sharedmemory"), vmAndSharedMemoryDB) + atomicMemory := atomic.NewMemory(sharedMemoryDB) + + chainIDToSubnetID := map[ids.ID]ids.ID{ + mainnetXChainID: constants.PrimaryNetworkID, + mainnetCChainID: constants.PrimaryNetworkID, + ids.Empty: constants.PrimaryNetworkID, + } + + vm = metervm.NewBlockVM(vm, meterVMRegistry) + + if err := vm.Initialize( + ctx, + &snow.Context{ + NetworkID: constants.MainnetID, + SubnetID: constants.PrimaryNetworkID, + ChainID: mainnetCChainID, + NodeID: ids.GenerateTestNodeID(), + PublicKey: blsPublicKey, + NetworkUpgrades: upgrade.Mainnet, + + XChainID: mainnetXChainID, + CChainID: mainnetCChainID, + AVAXAssetID: mainnetAvaxAssetID, + + Log: tests.NewDefaultLogger("mainnet-vm-reexecution"), + SharedMemory: atomicMemory.NewSharedMemory(mainnetCChainID), + BCLookup: ids.NewAliaser(), + Metrics: vmMultiGatherer, + + WarpSigner: warpSigner, + + ValidatorState: &validatorstest.State{ + GetSubnetIDF: func(_ context.Context, chainID ids.ID) (ids.ID, error) { + subnetID, ok := chainIDToSubnetID[chainID] + if ok { + return subnetID, nil + } + return ids.Empty, fmt.Errorf("unknown chainID: %s", chainID) + }, + }, + ChainDataDir: chainDataDir, + }, + prefixdb.New([]byte("vm"), vmAndSharedMemoryDB), + []byte(genesisConfig.CChainGenesis), + nil, + configBytes, + nil, + &enginetest.Sender{}, + ); err != nil { + return nil, fmt.Errorf("failed to initialize VM: %w", err) + } + + return vm, nil +} From 4ec09642bc74b0589fa3165c8f374cd1ecea41e3 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Mon, 15 Dec 2025 13:38:38 -0500 Subject: [PATCH 02/24] test(reexecute): add firewood chaos test ci: add chaos test job chore: nits chore: add nix installation step chore: configure AWS credentials chore: remove inputs chore: add perms chore: nit chore: nit chore: extend wait time chore: nits chore: nit chore: nit chore: Create shared `evm` module (#4690) chore(reexecute/c): remove go bench from benchmark (#4640) chore: nits fix: MAX_WAIT_TIME chore: nit chore: extend wait times chore: log errs chore: stdout tail chore: nit chore: nits chore: nits ci: improve workflow chore: nits chore: nits chore: nits chore: lint --- .github/workflows/chaos-test.json | 30 ++++ .github/workflows/chaos-test.yml | 108 +++++++++++++++ Taskfile.yml | 39 ++++++ graft/evm/.golangci.yml | 77 +++++++++++ graft/evm/go.mod | 9 ++ graft/evm/go.sum | 11 ++ tests/reexecute/c/vm_reexecute.go | 9 ++ tests/reexecute/chaos/deps.go | 117 ++++++++++++++++ tests/reexecute/chaos/main.go | 218 ++++++++++++++++++++++++++++++ 9 files changed, 618 insertions(+) create mode 100644 .github/workflows/chaos-test.json create mode 100644 .github/workflows/chaos-test.yml create mode 100644 graft/evm/.golangci.yml create mode 100644 tests/reexecute/chaos/deps.go create mode 100644 tests/reexecute/chaos/main.go diff --git a/.github/workflows/chaos-test.json b/.github/workflows/chaos-test.json new file mode 100644 index 000000000000..c85e1edb257e --- /dev/null +++ b/.github/workflows/chaos-test.json @@ -0,0 +1,30 @@ +{ + "pull_request": { + "include": [ + { + "start-block": "101", + "end-block": "200000", + "block-dir-src": "cchain-mainnet-blocks-1m-ldb", + "current-state-dir-src": "cchain-current-state-firewood-100", + "min-wait-time": "120s", + "max-wait-time": "150s", + "runner": "ubuntu-latest", + "timeout-minutes": 60 + } + ] + }, + "schedule": { + "include": [ + { + "start-block": "101", + "end-block": "200000", + "block-dir-src": "cchain-mainnet-blocks-1m-ldb", + "current-state-dir-src": "cchain-current-state-firewood-100", + "min-wait-time": "120s", + "max-wait-time": "150s", + "runner": "ubuntu-latest", + "timeout-minutes": 60 + } + ] + } +} diff --git a/.github/workflows/chaos-test.yml b/.github/workflows/chaos-test.yml new file mode 100644 index 000000000000..f78bca470d79 --- /dev/null +++ b/.github/workflows/chaos-test.yml @@ -0,0 +1,108 @@ +name: Firewood Chaos Test + +on: + workflow_dispatch: + inputs: + start-block: + description: 'The start block for the chaos test.' + default: '' + end-block: + description: 'The end block for the chaos test.' + default: '' + block-dir-src: + description: 'The source block directory. Supports S3 directory/zip and local directories.' + default: '' + current-state-dir-src: + description: 'The current state directory. Supports S3 directory/zip and local directories.' + default: '' + min-wait-time: + description: 'Minimum wait time before killing the process (e.g., 120s, 2m).' + default: '120s' + max-wait-time: + description: 'Maximum wait time before killing the process (e.g., 150s, 3m).' + default: '150s' + runner: + description: 'Runner to execute the chaos test. Input to the runs-on field of the job.' + required: true + timeout-minutes: + description: 'Timeout in minutes for the job.' + default: '60' + # XXX: remove this before merging + pull_request: + schedule: + - cron: '0 9 * * *' # Runs every day at 09:00 UTC (04:00 EST) + +jobs: + define-matrix: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.define-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + - name: Define Matrix + id: define-matrix + shell: bash -x {0} + run: | + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + { + echo "matrix<> "$GITHUB_OUTPUT" + else + json_string=$(jq -r ".\"${{ github.event_name }}\"" .github/workflows/chaos-test.json) + { + echo "matrix<> "$GITHUB_OUTPUT" + fi + + firewood-chaos-test: + needs: define-matrix + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.define-matrix.outputs.matrix) }} + timeout-minutes: ${{ matrix.timeout-minutes }} + runs-on: ${{ matrix.runner }} + permissions: + id-token: write + contents: read + steps: + - uses: cachix/install-nix-action@02a151ada4993995686f9ed4f1be7cfbb229e56f #v31 + with: + github_access_token: ${{ secrets.GITHUB_TOKEN }} + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_S3_READ_ONLY_ROLE }} + aws-region: 'us-east-2' + role-duration-seconds: '43200' + - uses: actions/checkout@v4 + - name: Set task env + shell: bash + run: | + TIMESTAMP=$(date '+%Y%m%d-%H%M%S') + echo "EXECUTION_DATA_DIR=/tmp/reexecution-data-${TIMESTAMP}" >> "$GITHUB_ENV" + - name: Run chaos test with Firewood + shell: nix develop --impure --command bash -x {0} + run: | + TIMESTAMP=$(date +%s) + EXECUTION_DATA_DIR="/tmp/reexecution-data-${TIMESTAMP}" + ./scripts/run_task.sh firewood-chaos-test-with-copied-data \ + START_BLOCK=${{ matrix.start-block }} \ + END_BLOCK=${{ matrix.end-block }} \ + BLOCK_DIR_SRC=${{ matrix.block-dir-src }} \ + EXECUTION_DATA_DIR=$EXECUTION_DATA_DIR \ + CURRENT_STATE_DIR_SRC=${{ matrix.current-state-dir-src }} \ + MIN_WAIT_TIME=${{ matrix.min-wait-time }} \ + MAX_WAIT_TIME=${{ matrix.max-wait-time }} + diff --git a/Taskfile.yml b/Taskfile.yml index 230daca54dd0..146ec63d027e 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -117,6 +117,45 @@ tasks: cmds: - cmd: bash -x ./scripts/copy_dir.sh {{.SRC}} {{.DST}} + firewood-chaos-test: + desc: Chaos test during reexecution test with Firewood + vars: + START_BLOCK: '{{.START_BLOCK}}' + END_BLOCK: '{{.END_BLOCK}}' + CURRENT_STATE_DIR: '{{.CURRENT_STATE_DIR}}' + BLOCK_DIR: '{{.BLOCK_DIR}}' + MIN_WAIT_TIME: '{{.MIN_WAIT_TIME}}' + MAX_WAIT_TIME: '{{.MAX_WAIT_TIME}}' + cmd: go run ./tests/reexecute/chaos --start-block={{.START_BLOCK}} + --end-block={{.END_BLOCK}} --current-state-dir={{.CURRENT_STATE_DIR}} + --block-dir={{.BLOCK_DIR}} --min-wait-time={{.MIN_WAIT_TIME}} + --max-wait-time={{.MAX_WAIT_TIME}} + + firewood-chaos-test-with-copied-data: + desc: Combines import-cchain-reexecute-range and firewood-chaos-test + vars: + START_BLOCK: '{{.START_BLOCK}}' + END_BLOCK: '{{.END_BLOCK}}' + CURRENT_STATE_DIR_SRC: '{{.CURRENT_STATE_DIR_SRC}}' + BLOCK_DIR_SRC: '{{.BLOCK_DIR_SRC}}' + EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' + MIN_WAIT_TIME: '{{.MIN_WAIT_TIME}}' + MAX_WAIT_TIME: '{{.MAX_WAIT_TIME}}' + cmds: + - task: import-cchain-reexecute-range + vars: + BLOCK_DIR_SRC: '{{.S3_BOOTSTRAP_BUCKET}}/{{.BLOCK_DIR_SRC}}/**' + CURRENT_STATE_DIR_SRC: '{{.S3_BOOTSTRAP_BUCKET}}/{{.CURRENT_STATE_DIR_SRC}}/**' + EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' + - task: firewood-chaos-test + vars: + START_BLOCK: '{{.START_BLOCK}}' + END_BLOCK: '{{.END_BLOCK}}' + CURRENT_STATE_DIR: '{{.EXECUTION_DATA_DIR}}/current-state' + BLOCK_DIR: '{{.EXECUTION_DATA_DIR}}/blocks' + MIN_WAIT_TIME: '{{.MIN_WAIT_TIME}}' + MAX_WAIT_TIME: '{{.MAX_WAIT_TIME}}' + generate-mocks: desc: Generates testing mocks cmds: diff --git a/graft/evm/.golangci.yml b/graft/evm/.golangci.yml new file mode 100644 index 000000000000..c85b064589b8 --- /dev/null +++ b/graft/evm/.golangci.yml @@ -0,0 +1,77 @@ +version: "2" +run: + tests: true + +linters: + default: none + enable: + - bidichk + - copyloopvar + - durationcheck + - gocheckcompilerdirectives + - govet + - ineffassign + - mirror + - misspell + - reassign + # - revive # only certain checks enabled + - staticcheck + - unconvert + - unused + - whitespace + settings: + goconst: + min-len: 3 # minimum length of string constant + min-occurrences: 6 # minimum number of occurrences + staticcheck: + checks: + - all + + # There's a lot of legacy code that triggers these warnings after upgrading golangci-lint. + # These checks are removed. + - -QF1001 # Apply De Morgan’s law + - -QF1002 # Convert an untagged switch comparing the same variable into a “tagged” switch + - -QF1003 # Convert if / else-if chains comparing the same variable into a tagged switch + - -QF1006 # Lift if + break into loop condition + - -QF1008 # Omit embedded fields from selector expression + - -QF1010 # Convert slice of bytes to string when printing it + - -SA1019 # Use of deprecated identifiers: triggers when you import or use something marked deprecated. + - -SA4009 # A function argument is overwritten before its first use. + - -SA9003 # Empty body in an if or else branch. + - -ST1003 # Poorly chosen identifier. + - -ST1008 # A function’s error value should be its last return value. + - -ST1016 # Use consistent method receiver names (e.g. avoid mixing `s *S`, `s S`, `t *T` in same type). + + exclusions: + generated: lax + presets: + - comments + - common-false-positives + - legacy + - std-error-handling + rules: + - linters: + - unused + # Exclude unused geth symbols from ethclient/ethclient.go to simplify upgrading Geth to a newer version. + path: ethclient\/ethclient\.go + text: (type `rpcProgress`|func `\(\*rpcProgress\)\.toSyncProgress`) is unused + - linters: + - goconst + path: (.+)_test\.go + paths: + - third_party$ + - builtin$ + - examples$ + +formatters: + enable: + - goimports + settings: + gofmt: + simplify: true + exclusions: + generated: lax + paths: + - third_party$ + - builtin$ + - examples$ diff --git a/graft/evm/go.mod b/graft/evm/go.mod index 3efcac388124..736b3514d33c 100644 --- a/graft/evm/go.mod +++ b/graft/evm/go.mod @@ -2,6 +2,7 @@ module github.com/ava-labs/avalanchego/graft/evm go 1.24.9 +<<<<<<< HEAD require ( github.com/ava-labs/firewood-go-ethhash/ffi v0.0.17 github.com/ava-labs/libevm v1.13.15-0.20251016142715-1bccf4f2ddb2 @@ -63,6 +64,14 @@ require ( google.golang.org/protobuf v1.36.5 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect rsc.io/tmplfunc v0.0.3 // indirect +======= +require github.com/ava-labs/libevm v1.13.15-0.20251016142715-1bccf4f2ddb2 + +require ( + github.com/holiman/uint256 v1.2.4 // indirect + golang.org/x/crypto v0.45.0 // indirect + golang.org/x/sys v0.38.0 // indirect +>>>>>>> e1cf68fed2 (test(reexecute): add firewood chaos test) ) replace github.com/ava-labs/avalanchego => ../../ diff --git a/graft/evm/go.sum b/graft/evm/go.sum index 805f05174f1d..b5d2458f5964 100644 --- a/graft/evm/go.sum +++ b/graft/evm/go.sum @@ -1,3 +1,4 @@ +<<<<<<< HEAD cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/AndreasBriese/bbloom v0.0.0-20190306092124-e2d15f34fcf9/go.mod h1:bOvUY6CB00SOBii9/FifXqc0awNKxLFCL/+pkDPuyl8= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= @@ -424,3 +425,13 @@ honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= rsc.io/tmplfunc v0.0.3 h1:53XFQh69AfOa8Tw0Jm7t+GV7KZhOi6jzsCzTtKbMvzU= rsc.io/tmplfunc v0.0.3/go.mod h1:AG3sTPzElb1Io3Yg4voV9AGZJuleGAwaVRxL9M49PhA= +======= +github.com/ava-labs/libevm v1.13.15-0.20251016142715-1bccf4f2ddb2 h1:hQ15IJxY7WOKqeJqCXawsiXh0NZTzmoQOemkWHz7rr4= +github.com/ava-labs/libevm v1.13.15-0.20251016142715-1bccf4f2ddb2/go.mod h1:DqSotSn4Dx/UJV+d3svfW8raR+cH7+Ohl9BpsQ5HlGU= +github.com/holiman/uint256 v1.2.4 h1:jUc4Nk8fm9jZabQuqr2JzednajVmBpC+oiTiXZJEApU= +github.com/holiman/uint256 v1.2.4/go.mod h1:EOMSn4q6Nyt9P6efbI3bueV4e1b3dGlUCXeiRV4ng7E= +golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q= +golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= +golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= +golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +>>>>>>> e1cf68fed2 (test(reexecute): add firewood chaos test) diff --git a/tests/reexecute/c/vm_reexecute.go b/tests/reexecute/c/vm_reexecute.go index 5155861dacf1..45d65b282d41 100644 --- a/tests/reexecute/c/vm_reexecute.go +++ b/tests/reexecute/c/vm_reexecute.go @@ -262,6 +262,15 @@ func benchmarkReexecuteRange( benchmarkTool := newBenchmarkTool(benchmarkName) getTopLevelMetrics(tc, benchmarkTool, prefixGatherer, elapsed) // Report the desired top-level metrics +<<<<<<< HEAD +======= + + benchmarkTool.logResults(log) + if len(benchmarkOutputFile) != 0 { + r.NoError(benchmarkTool.saveToFile(benchmarkOutputFile)) + } +} +>>>>>>> e1cf68fed2 (test(reexecute): add firewood chaos test) benchmarkTool.logResults(log) if len(benchmarkOutputFile) != 0 { diff --git a/tests/reexecute/chaos/deps.go b/tests/reexecute/chaos/deps.go new file mode 100644 index 000000000000..4ab1fd53769e --- /dev/null +++ b/tests/reexecute/chaos/deps.go @@ -0,0 +1,117 @@ +// Copyright (C) 2019-2025, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package main + +import ( + "context" + "fmt" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/ava-labs/avalanchego/api/metrics" + "github.com/ava-labs/avalanchego/chains/atomic" + "github.com/ava-labs/avalanchego/database" + "github.com/ava-labs/avalanchego/database/prefixdb" + "github.com/ava-labs/avalanchego/genesis" + "github.com/ava-labs/avalanchego/graft/coreth/plugin/factory" + "github.com/ava-labs/avalanchego/ids" + "github.com/ava-labs/avalanchego/snow" + "github.com/ava-labs/avalanchego/snow/engine/enginetest" + "github.com/ava-labs/avalanchego/snow/engine/snowman/block" + "github.com/ava-labs/avalanchego/snow/validators/validatorstest" + "github.com/ava-labs/avalanchego/tests" + "github.com/ava-labs/avalanchego/upgrade" + "github.com/ava-labs/avalanchego/utils/constants" + "github.com/ava-labs/avalanchego/utils/crypto/bls/signer/localsigner" + "github.com/ava-labs/avalanchego/utils/logging" + "github.com/ava-labs/avalanchego/vms/metervm" + "github.com/ava-labs/avalanchego/vms/platformvm/warp" +) + +var ( + mainnetXChainID = ids.FromStringOrPanic("2oYMBNV4eNHyqk2fjjV5nVQLDbtmNJzq5s3qs3Lo6ftnC6FByM") + mainnetCChainID = ids.FromStringOrPanic("2q9e4r6Mu3U68nU1fYjgbR6JvwrRx36CohpAX5UQxse55x1Q5") + mainnetAvaxAssetID = ids.FromStringOrPanic("FvwEAhmxKfeiG8SnEvq42hc6whRyY3EFYAvebMqDNDGCgxN5Z") +) + +func newMainnetCChainVM( + ctx context.Context, + vmAndSharedMemoryDB database.Database, + chainDataDir string, + configBytes []byte, + vmMultiGatherer metrics.MultiGatherer, + meterVMRegistry prometheus.Registerer, +) (block.ChainVM, error) { + factory := factory.Factory{} + vmIntf, err := factory.New(logging.NoLog{}) + if err != nil { + return nil, fmt.Errorf("failed to create VM from factory: %w", err) + } + vm := vmIntf.(block.ChainVM) + + blsKey, err := localsigner.New() + if err != nil { + return nil, fmt.Errorf("failed to create BLS key: %w", err) + } + + blsPublicKey := blsKey.PublicKey() + warpSigner := warp.NewSigner(blsKey, constants.MainnetID, mainnetCChainID) + + genesisConfig := genesis.GetConfig(constants.MainnetID) + + sharedMemoryDB := prefixdb.New([]byte("sharedmemory"), vmAndSharedMemoryDB) + atomicMemory := atomic.NewMemory(sharedMemoryDB) + + chainIDToSubnetID := map[ids.ID]ids.ID{ + mainnetXChainID: constants.PrimaryNetworkID, + mainnetCChainID: constants.PrimaryNetworkID, + ids.Empty: constants.PrimaryNetworkID, + } + + vm = metervm.NewBlockVM(vm, meterVMRegistry) + + if err := vm.Initialize( + ctx, + &snow.Context{ + NetworkID: constants.MainnetID, + SubnetID: constants.PrimaryNetworkID, + ChainID: mainnetCChainID, + NodeID: ids.GenerateTestNodeID(), + PublicKey: blsPublicKey, + NetworkUpgrades: upgrade.Mainnet, + + XChainID: mainnetXChainID, + CChainID: mainnetCChainID, + AVAXAssetID: mainnetAvaxAssetID, + + Log: tests.NewDefaultLogger("mainnet-vm-reexecution"), + SharedMemory: atomicMemory.NewSharedMemory(mainnetCChainID), + BCLookup: ids.NewAliaser(), + Metrics: vmMultiGatherer, + + WarpSigner: warpSigner, + + ValidatorState: &validatorstest.State{ + GetSubnetIDF: func(_ context.Context, chainID ids.ID) (ids.ID, error) { + subnetID, ok := chainIDToSubnetID[chainID] + if ok { + return subnetID, nil + } + return ids.Empty, fmt.Errorf("unknown chainID: %s", chainID) + }, + }, + ChainDataDir: chainDataDir, + }, + prefixdb.New([]byte("vm"), vmAndSharedMemoryDB), + []byte(genesisConfig.CChainGenesis), + nil, + configBytes, + nil, + &enginetest.Sender{}, + ); err != nil { + return nil, fmt.Errorf("failed to initialize VM: %w", err) + } + + return vm, nil +} diff --git a/tests/reexecute/chaos/main.go b/tests/reexecute/chaos/main.go new file mode 100644 index 000000000000..e018c7caf97a --- /dev/null +++ b/tests/reexecute/chaos/main.go @@ -0,0 +1,218 @@ +// Copyright (C) 2019-2025, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package main + +import ( + "flag" + "fmt" + "math/rand" + "os" + "os/exec" + "path/filepath" + "strconv" + "syscall" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" + "go.uber.org/zap" + + "github.com/ava-labs/avalanchego/api/metrics" + "github.com/ava-labs/avalanchego/database" + "github.com/ava-labs/avalanchego/database/leveldb" + "github.com/ava-labs/avalanchego/graft/coreth/plugin/evm" + "github.com/ava-labs/avalanchego/tests" + "github.com/ava-labs/avalanchego/utils/logging" +) + +var ( + blockDirArg string + currentStateDirArg string + startBlockArg uint64 + endBlockArg uint64 + minWaitTimeArg time.Duration + maxWaitTimeArg time.Duration + + firewoodConfig = `{ + "state-scheme": "firewood", + "snapshot-cache": 0, + "pruning-enabled": true, + "state-sync-enabled": false + }` +) + +func init() { + evm.RegisterAllLibEVMExtras() + + flag.StringVar(&blockDirArg, "block-dir", blockDirArg, "Block DB directory to read from during re-execution.") + flag.StringVar(¤tStateDirArg, "current-state-dir", currentStateDirArg, "Current state directory including VM DB and Chain Data Directory for re-execution.") + flag.Uint64Var(&startBlockArg, "start-block", 101, "Start block to begin execution (exclusive).") + flag.Uint64Var(&endBlockArg, "end-block", 200, "End block to end execution (inclusive).") + flag.DurationVar(&minWaitTimeArg, "min-wait-time", 20*time.Second, "Minimum amount of time to wait before crashing.") + flag.DurationVar(&maxWaitTimeArg, "max-wait-time", 30*time.Second, "Maximum amount of time to wait before crashing.") + + flag.Parse() +} + +func main() { + tc := tests.NewTestContext(tests.NewDefaultLogger("chaos-test")) + tc.SetDefaultContextParent(tests.DefaultNotifyContext(0, tc.DeferCleanup)) + tc.RecoverAndExit() + + run( + tc, + minWaitTimeArg, + maxWaitTimeArg, + blockDirArg, + currentStateDirArg, + startBlockArg, + endBlockArg, + ) +} + +// run executes a chaos test that simulates an application crash during C-Chain +// block reexecution that uses Firewood. It verifies that the VM can recover from +// an unexpected termination and resume processing from the correct block height +// using persisted state. +// +// Running the chaos test involves a few steps: +// 1. Start a reexecution test process using the Firewood state scheme +// 2. Allow the reexecution test to run for the specified wait duration +// 3. Forcefully terminate the process with SIGKILL to simulate a crash +// 4. Open the VM database to read the last accepted block height from persisted state +// 5. Restart the reexecution test from the recovered height to verify state consistency +func run( + tc tests.TestContext, + minWaitTime time.Duration, + maxWaitTime time.Duration, + blockDir string, + currentStateDir string, + startBlock uint64, + endBlock uint64, +) { + r := require.New(tc) + ctx := tc.GetDefaultContextParent() + log := tc.Log() + + cmd := createReexecutionCmd(blockDir, currentStateDir, startBlock, endBlock) + // Set process group ID so we can kill all child processes + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + + // 1. Start a reexecution test process using the Firewood state scheme + r.NoError(cmd.Start()) + + done := make(chan error, 1) + go func() { + done <- cmd.Wait() + }() + + // 2. Allow the reexecution test to run for the specified wait duration + waitTime := time.Duration(rand.Int63n(int64(maxWaitTime-minWaitTime)+1)) + minWaitTime + log.Debug("started reexecution test", zap.Duration("wait time", waitTime)) + + time.Sleep(waitTime) + + // 3. Forcefully terminate the process with SIGKILL to simulate a crash + select { + case waitErr := <-done: + r.FailNow("reexecution test terminated prior to crash test", zap.Error(waitErr)) + default: + pgid, err := syscall.Getpgid(cmd.Process.Pid) + r.NoError(err) + + log.Debug("killing reexecution test") + + r.NoError(syscall.Kill(-pgid, syscall.SIGKILL)) + + waitErr := <-done + r.Error(waitErr) + + exitErr, ok := waitErr.(*exec.ExitError) + r.True(ok) + + // ExitCode() returns -1 when killed by signal + r.Equal(-1, exitErr.ProcessState.ExitCode(), "unexpected exit code after kill") + } + + var ( + vmDBDir = filepath.Join(currentStateDir, "db") + chainDataDir = filepath.Join(currentStateDir, "chain-data-dir") + ) + + // 4. Open the VM database to read the last accepted block height from persisted state + db, err := openDB(vmDBDir, 10) + r.NoError(err) + + vm, err := newMainnetCChainVM( + ctx, + db, + chainDataDir, + []byte(firewoodConfig), + metrics.NewPrefixGatherer(), + prometheus.NewRegistry(), + ) + r.NoError(err) + + lastAcceptedID, err := vm.LastAccepted(ctx) + r.NoError(err) + + lastAcceptedBlock, err := vm.GetBlock(ctx, lastAcceptedID) + r.NoError(err) + + r.NoError(vm.Shutdown(ctx)) + r.NoError(db.Close()) + + log.Debug("read VM", zap.Uint64("latest height", lastAcceptedBlock.Height())) + + cmd = createReexecutionCmd(blockDir, currentStateDir, lastAcceptedBlock.Height()+1, endBlock) + + // 5. Restart the reexecution test from the recovered height to verify state consistency + r.NoError(cmd.Run()) +} + +// openDB attempts to open a LevelDB database with retry logic and exponential backoff. +// This is necessary after killing a process that held the database open, as the OS may +// need time to release file locks even after the process terminates. +// +// The backoff strategy increases by 500ms per attempt (500ms, 1s, 1.5s, 2s, ...). +func openDB(dbDir string, maxAttempts int) (database.Database, error) { + attempt := 0 + for { + db, err := leveldb.New(dbDir, nil, logging.NoLog{}, prometheus.NewRegistry()) + if err == nil { + return db, nil + } + + attempt += 1 + if attempt == maxAttempts { + return nil, fmt.Errorf("failed to reopen db after %d attempts: %w", maxAttempts, err) + } + + backoff := time.Duration(attempt) * 500 * time.Millisecond + time.Sleep(backoff) + } +} + +// createReexecutionCmd constructs a command to run the C-Chain reexecution test. +func createReexecutionCmd( + blockDir string, + currentStateDir string, + startBlock uint64, + endBlock uint64, +) *exec.Cmd { + cmd := exec.Command("go", + "run", + "github.com/ava-labs/avalanchego/tests/reexecute/c", + "--config=firewood", + "--block-dir="+blockDir, + "--current-state-dir="+currentStateDir, + "--start-block="+strconv.Itoa(int(startBlock)), + "--end-block="+strconv.Itoa(int(endBlock)), + ) + + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + return cmd +} From 5c7ec5c290e0f9a91c1767d29d9e327912a862d6 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 16 Dec 2025 17:38:24 -0500 Subject: [PATCH 03/24] chore: exponential => linear --- tests/reexecute/chaos/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/reexecute/chaos/main.go b/tests/reexecute/chaos/main.go index e018c7caf97a..4e3350abfc2a 100644 --- a/tests/reexecute/chaos/main.go +++ b/tests/reexecute/chaos/main.go @@ -171,7 +171,7 @@ func run( r.NoError(cmd.Run()) } -// openDB attempts to open a LevelDB database with retry logic and exponential backoff. +// openDB attempts to open a LevelDB database with retry logic and linear backoff. // This is necessary after killing a process that held the database open, as the OS may // need time to release file locks even after the process terminates. // From 9a893dac3c466f45754ede1295540382826364a5 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 16 Dec 2025 17:44:08 -0500 Subject: [PATCH 04/24] chore: add timeout when waiting for killed process to terminate --- tests/reexecute/chaos/main.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/reexecute/chaos/main.go b/tests/reexecute/chaos/main.go index 4e3350abfc2a..ed0a9a5b74ec 100644 --- a/tests/reexecute/chaos/main.go +++ b/tests/reexecute/chaos/main.go @@ -92,7 +92,6 @@ func run( endBlock uint64, ) { r := require.New(tc) - ctx := tc.GetDefaultContextParent() log := tc.Log() cmd := createReexecutionCmd(blockDir, currentStateDir, startBlock, endBlock) @@ -125,8 +124,15 @@ func run( r.NoError(syscall.Kill(-pgid, syscall.SIGKILL)) - waitErr := <-done - r.Error(waitErr) + waitCtx := tc.DefaultContext() + + var waitErr error + select { + case err := <-done: + waitErr = err + case <-waitCtx.Done(): + r.FailNow("timed out waiting for killed process to terminate") + } exitErr, ok := waitErr.(*exec.ExitError) r.True(ok) @@ -144,6 +150,7 @@ func run( db, err := openDB(vmDBDir, 10) r.NoError(err) + ctx := tc.GetDefaultContextParent() vm, err := newMainnetCChainVM( ctx, db, From 4e1b5acabcf0d13386addd8338c5040c41a07443 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Wed, 17 Dec 2025 08:01:57 -0500 Subject: [PATCH 05/24] chore: ctx --- tests/reexecute/chaos/main.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/reexecute/chaos/main.go b/tests/reexecute/chaos/main.go index ed0a9a5b74ec..809160e619d2 100644 --- a/tests/reexecute/chaos/main.go +++ b/tests/reexecute/chaos/main.go @@ -4,6 +4,7 @@ package main import ( + "context" "flag" "fmt" "math/rand" @@ -57,7 +58,7 @@ func init() { func main() { tc := tests.NewTestContext(tests.NewDefaultLogger("chaos-test")) - tc.SetDefaultContextParent(tests.DefaultNotifyContext(0, tc.DeferCleanup)) + tc.SetDefaultContextParent(context.Background()) tc.RecoverAndExit() run( From 7b6d057c9c7d7f41bf403842178467632c522125 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Wed, 17 Dec 2025 10:19:19 -0500 Subject: [PATCH 06/24] chore: rebase nits --- graft/evm/go.mod | 9 --------- graft/evm/go.sum | 11 ----------- tests/reexecute/c/vm_reexecute.go | 9 --------- 3 files changed, 29 deletions(-) diff --git a/graft/evm/go.mod b/graft/evm/go.mod index 736b3514d33c..3efcac388124 100644 --- a/graft/evm/go.mod +++ b/graft/evm/go.mod @@ -2,7 +2,6 @@ module github.com/ava-labs/avalanchego/graft/evm go 1.24.9 -<<<<<<< HEAD require ( github.com/ava-labs/firewood-go-ethhash/ffi v0.0.17 github.com/ava-labs/libevm v1.13.15-0.20251016142715-1bccf4f2ddb2 @@ -64,14 +63,6 @@ require ( google.golang.org/protobuf v1.36.5 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect rsc.io/tmplfunc v0.0.3 // indirect -======= -require github.com/ava-labs/libevm v1.13.15-0.20251016142715-1bccf4f2ddb2 - -require ( - github.com/holiman/uint256 v1.2.4 // indirect - golang.org/x/crypto v0.45.0 // indirect - golang.org/x/sys v0.38.0 // indirect ->>>>>>> e1cf68fed2 (test(reexecute): add firewood chaos test) ) replace github.com/ava-labs/avalanchego => ../../ diff --git a/graft/evm/go.sum b/graft/evm/go.sum index b5d2458f5964..805f05174f1d 100644 --- a/graft/evm/go.sum +++ b/graft/evm/go.sum @@ -1,4 +1,3 @@ -<<<<<<< HEAD cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/AndreasBriese/bbloom v0.0.0-20190306092124-e2d15f34fcf9/go.mod h1:bOvUY6CB00SOBii9/FifXqc0awNKxLFCL/+pkDPuyl8= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= @@ -425,13 +424,3 @@ honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= rsc.io/tmplfunc v0.0.3 h1:53XFQh69AfOa8Tw0Jm7t+GV7KZhOi6jzsCzTtKbMvzU= rsc.io/tmplfunc v0.0.3/go.mod h1:AG3sTPzElb1Io3Yg4voV9AGZJuleGAwaVRxL9M49PhA= -======= -github.com/ava-labs/libevm v1.13.15-0.20251016142715-1bccf4f2ddb2 h1:hQ15IJxY7WOKqeJqCXawsiXh0NZTzmoQOemkWHz7rr4= -github.com/ava-labs/libevm v1.13.15-0.20251016142715-1bccf4f2ddb2/go.mod h1:DqSotSn4Dx/UJV+d3svfW8raR+cH7+Ohl9BpsQ5HlGU= -github.com/holiman/uint256 v1.2.4 h1:jUc4Nk8fm9jZabQuqr2JzednajVmBpC+oiTiXZJEApU= -github.com/holiman/uint256 v1.2.4/go.mod h1:EOMSn4q6Nyt9P6efbI3bueV4e1b3dGlUCXeiRV4ng7E= -golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q= -golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= -golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= -golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= ->>>>>>> e1cf68fed2 (test(reexecute): add firewood chaos test) diff --git a/tests/reexecute/c/vm_reexecute.go b/tests/reexecute/c/vm_reexecute.go index 45d65b282d41..5155861dacf1 100644 --- a/tests/reexecute/c/vm_reexecute.go +++ b/tests/reexecute/c/vm_reexecute.go @@ -262,15 +262,6 @@ func benchmarkReexecuteRange( benchmarkTool := newBenchmarkTool(benchmarkName) getTopLevelMetrics(tc, benchmarkTool, prefixGatherer, elapsed) // Report the desired top-level metrics -<<<<<<< HEAD -======= - - benchmarkTool.logResults(log) - if len(benchmarkOutputFile) != 0 { - r.NoError(benchmarkTool.saveToFile(benchmarkOutputFile)) - } -} ->>>>>>> e1cf68fed2 (test(reexecute): add firewood chaos test) benchmarkTool.logResults(log) if len(benchmarkOutputFile) != 0 { From c8e8582d946da90f784e1adabd823197f44f1f5e Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Wed, 17 Dec 2025 10:21:31 -0500 Subject: [PATCH 07/24] chore: remove unnecessary diff --- graft/evm/.golangci.yml | 77 ----------------------------------------- 1 file changed, 77 deletions(-) delete mode 100644 graft/evm/.golangci.yml diff --git a/graft/evm/.golangci.yml b/graft/evm/.golangci.yml deleted file mode 100644 index c85b064589b8..000000000000 --- a/graft/evm/.golangci.yml +++ /dev/null @@ -1,77 +0,0 @@ -version: "2" -run: - tests: true - -linters: - default: none - enable: - - bidichk - - copyloopvar - - durationcheck - - gocheckcompilerdirectives - - govet - - ineffassign - - mirror - - misspell - - reassign - # - revive # only certain checks enabled - - staticcheck - - unconvert - - unused - - whitespace - settings: - goconst: - min-len: 3 # minimum length of string constant - min-occurrences: 6 # minimum number of occurrences - staticcheck: - checks: - - all - - # There's a lot of legacy code that triggers these warnings after upgrading golangci-lint. - # These checks are removed. - - -QF1001 # Apply De Morgan’s law - - -QF1002 # Convert an untagged switch comparing the same variable into a “tagged” switch - - -QF1003 # Convert if / else-if chains comparing the same variable into a tagged switch - - -QF1006 # Lift if + break into loop condition - - -QF1008 # Omit embedded fields from selector expression - - -QF1010 # Convert slice of bytes to string when printing it - - -SA1019 # Use of deprecated identifiers: triggers when you import or use something marked deprecated. - - -SA4009 # A function argument is overwritten before its first use. - - -SA9003 # Empty body in an if or else branch. - - -ST1003 # Poorly chosen identifier. - - -ST1008 # A function’s error value should be its last return value. - - -ST1016 # Use consistent method receiver names (e.g. avoid mixing `s *S`, `s S`, `t *T` in same type). - - exclusions: - generated: lax - presets: - - comments - - common-false-positives - - legacy - - std-error-handling - rules: - - linters: - - unused - # Exclude unused geth symbols from ethclient/ethclient.go to simplify upgrading Geth to a newer version. - path: ethclient\/ethclient\.go - text: (type `rpcProgress`|func `\(\*rpcProgress\)\.toSyncProgress`) is unused - - linters: - - goconst - path: (.+)_test\.go - paths: - - third_party$ - - builtin$ - - examples$ - -formatters: - enable: - - goimports - settings: - gofmt: - simplify: true - exclusions: - generated: lax - paths: - - third_party$ - - builtin$ - - examples$ From 506a2e3ca586c7859adb0f6d3bfc76c8a4a70565 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Wed, 17 Dec 2025 10:30:16 -0500 Subject: [PATCH 08/24] chore: remove deps.go --- tests/reexecute/chaos/deps.go | 117 ---------------------------------- tests/reexecute/chaos/main.go | 3 +- 2 files changed, 2 insertions(+), 118 deletions(-) delete mode 100644 tests/reexecute/chaos/deps.go diff --git a/tests/reexecute/chaos/deps.go b/tests/reexecute/chaos/deps.go deleted file mode 100644 index 4ab1fd53769e..000000000000 --- a/tests/reexecute/chaos/deps.go +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright (C) 2019-2025, Ava Labs, Inc. All rights reserved. -// See the file LICENSE for licensing terms. - -package main - -import ( - "context" - "fmt" - - "github.com/prometheus/client_golang/prometheus" - - "github.com/ava-labs/avalanchego/api/metrics" - "github.com/ava-labs/avalanchego/chains/atomic" - "github.com/ava-labs/avalanchego/database" - "github.com/ava-labs/avalanchego/database/prefixdb" - "github.com/ava-labs/avalanchego/genesis" - "github.com/ava-labs/avalanchego/graft/coreth/plugin/factory" - "github.com/ava-labs/avalanchego/ids" - "github.com/ava-labs/avalanchego/snow" - "github.com/ava-labs/avalanchego/snow/engine/enginetest" - "github.com/ava-labs/avalanchego/snow/engine/snowman/block" - "github.com/ava-labs/avalanchego/snow/validators/validatorstest" - "github.com/ava-labs/avalanchego/tests" - "github.com/ava-labs/avalanchego/upgrade" - "github.com/ava-labs/avalanchego/utils/constants" - "github.com/ava-labs/avalanchego/utils/crypto/bls/signer/localsigner" - "github.com/ava-labs/avalanchego/utils/logging" - "github.com/ava-labs/avalanchego/vms/metervm" - "github.com/ava-labs/avalanchego/vms/platformvm/warp" -) - -var ( - mainnetXChainID = ids.FromStringOrPanic("2oYMBNV4eNHyqk2fjjV5nVQLDbtmNJzq5s3qs3Lo6ftnC6FByM") - mainnetCChainID = ids.FromStringOrPanic("2q9e4r6Mu3U68nU1fYjgbR6JvwrRx36CohpAX5UQxse55x1Q5") - mainnetAvaxAssetID = ids.FromStringOrPanic("FvwEAhmxKfeiG8SnEvq42hc6whRyY3EFYAvebMqDNDGCgxN5Z") -) - -func newMainnetCChainVM( - ctx context.Context, - vmAndSharedMemoryDB database.Database, - chainDataDir string, - configBytes []byte, - vmMultiGatherer metrics.MultiGatherer, - meterVMRegistry prometheus.Registerer, -) (block.ChainVM, error) { - factory := factory.Factory{} - vmIntf, err := factory.New(logging.NoLog{}) - if err != nil { - return nil, fmt.Errorf("failed to create VM from factory: %w", err) - } - vm := vmIntf.(block.ChainVM) - - blsKey, err := localsigner.New() - if err != nil { - return nil, fmt.Errorf("failed to create BLS key: %w", err) - } - - blsPublicKey := blsKey.PublicKey() - warpSigner := warp.NewSigner(blsKey, constants.MainnetID, mainnetCChainID) - - genesisConfig := genesis.GetConfig(constants.MainnetID) - - sharedMemoryDB := prefixdb.New([]byte("sharedmemory"), vmAndSharedMemoryDB) - atomicMemory := atomic.NewMemory(sharedMemoryDB) - - chainIDToSubnetID := map[ids.ID]ids.ID{ - mainnetXChainID: constants.PrimaryNetworkID, - mainnetCChainID: constants.PrimaryNetworkID, - ids.Empty: constants.PrimaryNetworkID, - } - - vm = metervm.NewBlockVM(vm, meterVMRegistry) - - if err := vm.Initialize( - ctx, - &snow.Context{ - NetworkID: constants.MainnetID, - SubnetID: constants.PrimaryNetworkID, - ChainID: mainnetCChainID, - NodeID: ids.GenerateTestNodeID(), - PublicKey: blsPublicKey, - NetworkUpgrades: upgrade.Mainnet, - - XChainID: mainnetXChainID, - CChainID: mainnetCChainID, - AVAXAssetID: mainnetAvaxAssetID, - - Log: tests.NewDefaultLogger("mainnet-vm-reexecution"), - SharedMemory: atomicMemory.NewSharedMemory(mainnetCChainID), - BCLookup: ids.NewAliaser(), - Metrics: vmMultiGatherer, - - WarpSigner: warpSigner, - - ValidatorState: &validatorstest.State{ - GetSubnetIDF: func(_ context.Context, chainID ids.ID) (ids.ID, error) { - subnetID, ok := chainIDToSubnetID[chainID] - if ok { - return subnetID, nil - } - return ids.Empty, fmt.Errorf("unknown chainID: %s", chainID) - }, - }, - ChainDataDir: chainDataDir, - }, - prefixdb.New([]byte("vm"), vmAndSharedMemoryDB), - []byte(genesisConfig.CChainGenesis), - nil, - configBytes, - nil, - &enginetest.Sender{}, - ); err != nil { - return nil, fmt.Errorf("failed to initialize VM: %w", err) - } - - return vm, nil -} diff --git a/tests/reexecute/chaos/main.go b/tests/reexecute/chaos/main.go index 809160e619d2..3edaf6c53433 100644 --- a/tests/reexecute/chaos/main.go +++ b/tests/reexecute/chaos/main.go @@ -24,6 +24,7 @@ import ( "github.com/ava-labs/avalanchego/database/leveldb" "github.com/ava-labs/avalanchego/graft/coreth/plugin/evm" "github.com/ava-labs/avalanchego/tests" + "github.com/ava-labs/avalanchego/tests/reexecute" "github.com/ava-labs/avalanchego/utils/logging" ) @@ -152,7 +153,7 @@ func run( r.NoError(err) ctx := tc.GetDefaultContextParent() - vm, err := newMainnetCChainVM( + vm, err := reexecute.NewMainnetCChainVM( ctx, db, chainDataDir, From 98273a08a79f01b6a9cf89a1e9f6875c820824f2 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Wed, 17 Dec 2025 16:30:48 -0500 Subject: [PATCH 09/24] chore: rename task --- .github/workflows/chaos-test.yml | 2 +- Taskfile.yml | 78 ++++++++++++++++---------------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/.github/workflows/chaos-test.yml b/.github/workflows/chaos-test.yml index f78bca470d79..b10ad9ca15d8 100644 --- a/.github/workflows/chaos-test.yml +++ b/.github/workflows/chaos-test.yml @@ -97,7 +97,7 @@ jobs: run: | TIMESTAMP=$(date +%s) EXECUTION_DATA_DIR="/tmp/reexecution-data-${TIMESTAMP}" - ./scripts/run_task.sh firewood-chaos-test-with-copied-data \ + ./scripts/run_task.sh test-firewood-chaos-with-copied-data \ START_BLOCK=${{ matrix.start-block }} \ END_BLOCK=${{ matrix.end-block }} \ BLOCK_DIR_SRC=${{ matrix.block-dir-src }} \ diff --git a/Taskfile.yml b/Taskfile.yml index 146ec63d027e..528961739666 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -117,45 +117,6 @@ tasks: cmds: - cmd: bash -x ./scripts/copy_dir.sh {{.SRC}} {{.DST}} - firewood-chaos-test: - desc: Chaos test during reexecution test with Firewood - vars: - START_BLOCK: '{{.START_BLOCK}}' - END_BLOCK: '{{.END_BLOCK}}' - CURRENT_STATE_DIR: '{{.CURRENT_STATE_DIR}}' - BLOCK_DIR: '{{.BLOCK_DIR}}' - MIN_WAIT_TIME: '{{.MIN_WAIT_TIME}}' - MAX_WAIT_TIME: '{{.MAX_WAIT_TIME}}' - cmd: go run ./tests/reexecute/chaos --start-block={{.START_BLOCK}} - --end-block={{.END_BLOCK}} --current-state-dir={{.CURRENT_STATE_DIR}} - --block-dir={{.BLOCK_DIR}} --min-wait-time={{.MIN_WAIT_TIME}} - --max-wait-time={{.MAX_WAIT_TIME}} - - firewood-chaos-test-with-copied-data: - desc: Combines import-cchain-reexecute-range and firewood-chaos-test - vars: - START_BLOCK: '{{.START_BLOCK}}' - END_BLOCK: '{{.END_BLOCK}}' - CURRENT_STATE_DIR_SRC: '{{.CURRENT_STATE_DIR_SRC}}' - BLOCK_DIR_SRC: '{{.BLOCK_DIR_SRC}}' - EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' - MIN_WAIT_TIME: '{{.MIN_WAIT_TIME}}' - MAX_WAIT_TIME: '{{.MAX_WAIT_TIME}}' - cmds: - - task: import-cchain-reexecute-range - vars: - BLOCK_DIR_SRC: '{{.S3_BOOTSTRAP_BUCKET}}/{{.BLOCK_DIR_SRC}}/**' - CURRENT_STATE_DIR_SRC: '{{.S3_BOOTSTRAP_BUCKET}}/{{.CURRENT_STATE_DIR_SRC}}/**' - EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' - - task: firewood-chaos-test - vars: - START_BLOCK: '{{.START_BLOCK}}' - END_BLOCK: '{{.END_BLOCK}}' - CURRENT_STATE_DIR: '{{.EXECUTION_DATA_DIR}}/current-state' - BLOCK_DIR: '{{.EXECUTION_DATA_DIR}}/blocks' - MIN_WAIT_TIME: '{{.MIN_WAIT_TIME}}' - MAX_WAIT_TIME: '{{.MAX_WAIT_TIME}}' - generate-mocks: desc: Generates testing mocks cmds: @@ -433,6 +394,45 @@ tasks: E2E_SERIAL: 1 cmds: - cmd: bash -x ./scripts/tests.e2e.kube.sh --ginkgo.focus-file=xsvm.go {{.CLI_ARGS}} + + test-firewood-chaos: + desc: Chaos test during reexecution test with Firewood + vars: + START_BLOCK: '{{.START_BLOCK}}' + END_BLOCK: '{{.END_BLOCK}}' + CURRENT_STATE_DIR: '{{.CURRENT_STATE_DIR}}' + BLOCK_DIR: '{{.BLOCK_DIR}}' + MIN_WAIT_TIME: '{{.MIN_WAIT_TIME}}' + MAX_WAIT_TIME: '{{.MAX_WAIT_TIME}}' + cmd: go run ./tests/reexecute/chaos --start-block={{.START_BLOCK}} + --end-block={{.END_BLOCK}} --current-state-dir={{.CURRENT_STATE_DIR}} + --block-dir={{.BLOCK_DIR}} --min-wait-time={{.MIN_WAIT_TIME}} + --max-wait-time={{.MAX_WAIT_TIME}} + + test-firewood-chaos-with-copied-data: + desc: Combines import-cchain-reexecute-range and firewood-chaos-test + vars: + START_BLOCK: '{{.START_BLOCK}}' + END_BLOCK: '{{.END_BLOCK}}' + CURRENT_STATE_DIR_SRC: '{{.CURRENT_STATE_DIR_SRC}}' + BLOCK_DIR_SRC: '{{.BLOCK_DIR_SRC}}' + EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' + MIN_WAIT_TIME: '{{.MIN_WAIT_TIME}}' + MAX_WAIT_TIME: '{{.MAX_WAIT_TIME}}' + cmds: + - task: import-cchain-reexecute-range + vars: + BLOCK_DIR_SRC: '{{.S3_BOOTSTRAP_BUCKET}}/{{.BLOCK_DIR_SRC}}/**' + CURRENT_STATE_DIR_SRC: '{{.S3_BOOTSTRAP_BUCKET}}/{{.CURRENT_STATE_DIR_SRC}}/**' + EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' + - task: firewood-chaos-test + vars: + START_BLOCK: '{{.START_BLOCK}}' + END_BLOCK: '{{.END_BLOCK}}' + CURRENT_STATE_DIR: '{{.EXECUTION_DATA_DIR}}/current-state' + BLOCK_DIR: '{{.EXECUTION_DATA_DIR}}/blocks' + MIN_WAIT_TIME: '{{.MIN_WAIT_TIME}}' + MAX_WAIT_TIME: '{{.MAX_WAIT_TIME}}' # To use a different fuzz time, run `task test-fuzz FUZZTIME=[value in seconds]`. # A value of `-1` will run until it encounters a failing output. From d9899109dec4708997f88d0593ab3e7b01635416 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Wed, 17 Dec 2025 16:33:50 -0500 Subject: [PATCH 10/24] chore: nit --- Taskfile.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Taskfile.yml b/Taskfile.yml index 528961739666..42a7fc0cab16 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -425,7 +425,7 @@ tasks: BLOCK_DIR_SRC: '{{.S3_BOOTSTRAP_BUCKET}}/{{.BLOCK_DIR_SRC}}/**' CURRENT_STATE_DIR_SRC: '{{.S3_BOOTSTRAP_BUCKET}}/{{.CURRENT_STATE_DIR_SRC}}/**' EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' - - task: firewood-chaos-test + - task: test-firewood-chaos vars: START_BLOCK: '{{.START_BLOCK}}' END_BLOCK: '{{.END_BLOCK}}' From 00a4852cb5f381bac7bec23f9dd266f58a8d371a Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 6 Jan 2026 09:16:53 -0500 Subject: [PATCH 11/24] refactor!: replace task logic with script --- .github/workflows/chaos-test.json | 14 +--- .github/workflows/chaos-test.yml | 36 ++++----- Taskfile.yml | 39 ---------- scripts/tests.firewood_chaos.sh | 124 ++++++++++++++++++++++++++++++ tests/reexecute/chaos/main.go | 2 +- 5 files changed, 144 insertions(+), 71 deletions(-) create mode 100755 scripts/tests.firewood_chaos.sh diff --git a/.github/workflows/chaos-test.json b/.github/workflows/chaos-test.json index c85e1edb257e..7f9917178ad8 100644 --- a/.github/workflows/chaos-test.json +++ b/.github/workflows/chaos-test.json @@ -2,12 +2,7 @@ "pull_request": { "include": [ { - "start-block": "101", - "end-block": "200000", - "block-dir-src": "cchain-mainnet-blocks-1m-ldb", - "current-state-dir-src": "cchain-current-state-firewood-100", - "min-wait-time": "120s", - "max-wait-time": "150s", + "test": "101-250k", "runner": "ubuntu-latest", "timeout-minutes": 60 } @@ -16,12 +11,7 @@ "schedule": { "include": [ { - "start-block": "101", - "end-block": "200000", - "block-dir-src": "cchain-mainnet-blocks-1m-ldb", - "current-state-dir-src": "cchain-current-state-firewood-100", - "min-wait-time": "120s", - "max-wait-time": "150s", + "test": "101-250k", "runner": "ubuntu-latest", "timeout-minutes": 60 } diff --git a/.github/workflows/chaos-test.yml b/.github/workflows/chaos-test.yml index b10ad9ca15d8..75616f1e9a70 100644 --- a/.github/workflows/chaos-test.yml +++ b/.github/workflows/chaos-test.yml @@ -3,11 +3,15 @@ name: Firewood Chaos Test on: workflow_dispatch: inputs: + test: + description: 'Test name to run (e.g., 101-250k). Leave empty to use custom inputs below.' + default: '' + # Custom inputs (used when test is not provided) start-block: - description: 'The start block for the chaos test.' + description: 'The start block for the benchmark.' default: '' end-block: - description: 'The end block for the chaos test.' + description: 'The end block for the benchmark.' default: '' block-dir-src: description: 'The source block directory. Supports S3 directory/zip and local directories.' @@ -15,6 +19,7 @@ on: current-state-dir-src: description: 'The current state directory. Supports S3 directory/zip and local directories.' default: '' + # Chaos test specific inputs min-wait-time: description: 'Minimum wait time before killing the process (e.g., 120s, 2m).' default: '120s' @@ -46,7 +51,8 @@ jobs: if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then { echo "matrix<> "$GITHUB_ENV" - name: Run chaos test with Firewood shell: nix develop --impure --command bash -x {0} - run: | - TIMESTAMP=$(date +%s) - EXECUTION_DATA_DIR="/tmp/reexecution-data-${TIMESTAMP}" - ./scripts/run_task.sh test-firewood-chaos-with-copied-data \ - START_BLOCK=${{ matrix.start-block }} \ - END_BLOCK=${{ matrix.end-block }} \ - BLOCK_DIR_SRC=${{ matrix.block-dir-src }} \ - EXECUTION_DATA_DIR=$EXECUTION_DATA_DIR \ - CURRENT_STATE_DIR_SRC=${{ matrix.current-state-dir-src }} \ - MIN_WAIT_TIME=${{ matrix.min-wait-time }} \ - MAX_WAIT_TIME=${{ matrix.max-wait-time }} + run: ./scripts/tests.firewood_chaos.sh "${{ matrix.test || '' }}" + env: + START_BLOCK: ${{ matrix.start-block }} + END_BLOCK: ${{ matrix.end-block }} + BLOCK_DIR_SRC: ${{ matrix.block-dir-src }} + CURRENT_STATE_DIR_SRC: ${{ matrix.current-state-dir-src }} + MIN_WAIT_TIME: ${{ matrix.min-wait-time }} + MAX_WAIT_TIME: ${{ matrix.max-wait-time }} diff --git a/Taskfile.yml b/Taskfile.yml index af14a83c1d54..a7967241f62b 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -238,45 +238,6 @@ tasks: E2E_SERIAL: 1 cmds: - cmd: bash -x ./scripts/tests.e2e.kube.sh --ginkgo.focus-file=xsvm.go {{.CLI_ARGS}} - - test-firewood-chaos: - desc: Chaos test during reexecution test with Firewood - vars: - START_BLOCK: '{{.START_BLOCK}}' - END_BLOCK: '{{.END_BLOCK}}' - CURRENT_STATE_DIR: '{{.CURRENT_STATE_DIR}}' - BLOCK_DIR: '{{.BLOCK_DIR}}' - MIN_WAIT_TIME: '{{.MIN_WAIT_TIME}}' - MAX_WAIT_TIME: '{{.MAX_WAIT_TIME}}' - cmd: go run ./tests/reexecute/chaos --start-block={{.START_BLOCK}} - --end-block={{.END_BLOCK}} --current-state-dir={{.CURRENT_STATE_DIR}} - --block-dir={{.BLOCK_DIR}} --min-wait-time={{.MIN_WAIT_TIME}} - --max-wait-time={{.MAX_WAIT_TIME}} - - test-firewood-chaos-with-copied-data: - desc: Combines import-cchain-reexecute-range and firewood-chaos-test - vars: - START_BLOCK: '{{.START_BLOCK}}' - END_BLOCK: '{{.END_BLOCK}}' - CURRENT_STATE_DIR_SRC: '{{.CURRENT_STATE_DIR_SRC}}' - BLOCK_DIR_SRC: '{{.BLOCK_DIR_SRC}}' - EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' - MIN_WAIT_TIME: '{{.MIN_WAIT_TIME}}' - MAX_WAIT_TIME: '{{.MAX_WAIT_TIME}}' - cmds: - - task: import-cchain-reexecute-range - vars: - BLOCK_DIR_SRC: '{{.S3_BOOTSTRAP_BUCKET}}/{{.BLOCK_DIR_SRC}}/**' - CURRENT_STATE_DIR_SRC: '{{.S3_BOOTSTRAP_BUCKET}}/{{.CURRENT_STATE_DIR_SRC}}/**' - EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' - - task: test-firewood-chaos - vars: - START_BLOCK: '{{.START_BLOCK}}' - END_BLOCK: '{{.END_BLOCK}}' - CURRENT_STATE_DIR: '{{.EXECUTION_DATA_DIR}}/current-state' - BLOCK_DIR: '{{.EXECUTION_DATA_DIR}}/blocks' - MIN_WAIT_TIME: '{{.MIN_WAIT_TIME}}' - MAX_WAIT_TIME: '{{.MAX_WAIT_TIME}}' # To use a different fuzz time, run `task test-fuzz FUZZTIME=[value in seconds]`. # A value of `-1` will run until it encounters a failing output. diff --git a/scripts/tests.firewood_chaos.sh b/scripts/tests.firewood_chaos.sh new file mode 100755 index 000000000000..42aa6280e0e3 --- /dev/null +++ b/scripts/tests.firewood_chaos.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Firewood Chaos Test +# +# Usage: +# ./tests.firewood_chaos.sh [test-name] +# +# To see available tests: use `help` as the test name or invoke +# without a test name and without required env vars. +# +# Test names configure defaults for S3 sources and block ranges. +# All defaults can be overridden via environment variables. +# +# Environment variables: +# Data sources (provide S3 sources OR local paths): +# BLOCK_DIR_SRC: S3 object key for blocks (triggers S3 import). +# CURRENT_STATE_DIR_SRC: S3 object key for state (triggers S3 import). +# BLOCK_DIR: Path to local block directory. +# CURRENT_STATE_DIR: Path to local current state directory. +# +# Required: +# START_BLOCK: The starting block height (inclusive). +# END_BLOCK: The ending block height (inclusive). +# MIN_WAIT_TIME: The minimum amount of time to wait before crashing. +# MAX_WAIT_TIME: The maximum amount of time to wait before crashing. + +show_usage() { + cat <&2 + fi + exit 1 +} + +# Set defaults based on test name (if provided) +TEST_NAME="${1:-}" +if [[ -n "$TEST_NAME" ]]; then + shift + case "$TEST_NAME" in + help) + show_usage + exit 0 + ;; + 101-250k) + BLOCK_DIR_SRC="${BLOCK_DIR_SRC:-cchain-mainnet-blocks-1m-ldb}" + CURRENT_STATE_DIR_SRC="${CURRENT_STATE_DIR_SRC:-cchain-current-state-firewood-100}" + START_BLOCK="${START_BLOCK:-101}" + END_BLOCK="${END_BLOCK:-250000}" + MIN_WAIT_TIME="${MIN_WAIT_TIME:-120s}" + MAX_WAIT_TIME="${MAX_WAIT_TIME:-150s}" + ;; + *) + error "Unknown test '$TEST_NAME'" + ;; + esac +fi + +# Determine data source: S3 import or local paths +if [[ -n "${BLOCK_DIR_SRC:-}" && -n "${CURRENT_STATE_DIR_SRC:-}" ]]; then + # S3 mode - import data + TIMESTAMP=$(date '+%Y%m%d-%H%M%S') + EXECUTION_DATA_DIR="${EXECUTION_DATA_DIR:-/tmp/reexec-${TEST_NAME:-custom}-${TIMESTAMP}}" + + BLOCK_DIR_SRC="${BLOCK_DIR_SRC}" \ + CURRENT_STATE_DIR_SRC="${CURRENT_STATE_DIR_SRC}" \ + EXECUTION_DATA_DIR="${EXECUTION_DATA_DIR}" \ + "${SCRIPT_DIR}/import_cchain_data.sh" + + BLOCK_DIR="${EXECUTION_DATA_DIR}/blocks" + CURRENT_STATE_DIR="${EXECUTION_DATA_DIR}/current-state" +elif [[ -n "${BLOCK_DIR_SRC:-}" || -n "${CURRENT_STATE_DIR_SRC:-}" ]]; then + error "Both BLOCK_DIR_SRC and CURRENT_STATE_DIR_SRC must be provided together" +elif [[ -z "${BLOCK_DIR:-}" || -z "${CURRENT_STATE_DIR:-}" ]]; then + show_usage + echo "" + echo "Env vars status:" + echo " S3 sources:" + [[ -n "${BLOCK_DIR_SRC:-}" ]] && echo " BLOCK_DIR_SRC: ${BLOCK_DIR_SRC}" || echo " BLOCK_DIR_SRC: (not set)" + [[ -n "${CURRENT_STATE_DIR_SRC:-}" ]] && echo " CURRENT_STATE_DIR_SRC: ${CURRENT_STATE_DIR_SRC}" || echo " CURRENT_STATE_DIR_SRC: (not set)" + echo " Local paths:" + [[ -n "${BLOCK_DIR:-}" ]] && echo " BLOCK_DIR: ${BLOCK_DIR}" || echo " BLOCK_DIR: (not set)" + [[ -n "${CURRENT_STATE_DIR:-}" ]] && echo " CURRENT_STATE_DIR: ${CURRENT_STATE_DIR}" || echo " CURRENT_STATE_DIR: (not set)" + echo " Block range:" + [[ -n "${START_BLOCK:-}" ]] && echo " START_BLOCK: ${START_BLOCK}" || echo " START_BLOCK: (not set)" + [[ -n "${END_BLOCK:-}" ]] && echo " END_BLOCK: ${END_BLOCK}" || echo " END_BLOCK: (not set)" + echo " Timeouts:" + [[ -n "${MIN_WAIT_TIME:-}" ]] && echo " MIN_WAIT_TIME: ${MIN_WAIT_TIME}" || echo " MIN_WAIT_TIME: (not set)" + [[ -n "${MAX_WAIT_TIME:-}" ]] && echo " MAX_WAIT_TIME: ${MAX_WAIT_TIME}" || echo " MAX_WAIT_TIME: (not set)" + exit 1 +fi + +# Validate block range +if [[ -z "${START_BLOCK:-}" || -z "${END_BLOCK:-}" ]]; then + error "START_BLOCK and END_BLOCK are required" +fi + +echo "=== Firewood Chaos Test: ${TEST_NAME:-custom} ===" +echo "Blocks: ${START_BLOCK} - ${END_BLOCK}" +echo "Crashing between ${MIN_WAIT_TIME} and ${MAX_WAIT_TIME}" + +echo "=== Running Chaos Test ===" +go run ./tests/reexecute/chaos \ + --start-block="${START_BLOCK}" \ + --end-block="${END_BLOCK}" \ + --current-state-dir="${CURRENT_STATE_DIR}" \ + --block-dir="${BLOCK_DIR}" \ + --min-wait-time="${MIN_WAIT_TIME}" \ + --max-wait-time="${MAX_WAIT_TIME}" \ No newline at end of file diff --git a/tests/reexecute/chaos/main.go b/tests/reexecute/chaos/main.go index 3edaf6c53433..f61c475edb13 100644 --- a/tests/reexecute/chaos/main.go +++ b/tests/reexecute/chaos/main.go @@ -1,4 +1,4 @@ -// Copyright (C) 2019-2025, Ava Labs, Inc. All rights reserved. +// Copyright (C) 2019-2026, Ava Labs, Inc. All rights reserved. // See the file LICENSE for licensing terms. package main From 8b17166b053d0fe27e3a6c197307d758ebb7ab6e Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 6 Jan 2026 09:41:24 -0500 Subject: [PATCH 12/24] chore: add archival test --- .github/workflows/chaos-test.json | 10 ++++++++++ .github/workflows/chaos-test.yml | 2 +- Taskfile.yml | 4 ++++ scripts/tests.firewood_chaos.sh | 12 ++++++++++-- 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/.github/workflows/chaos-test.json b/.github/workflows/chaos-test.json index 7f9917178ad8..8693ce1d6ad2 100644 --- a/.github/workflows/chaos-test.json +++ b/.github/workflows/chaos-test.json @@ -5,6 +5,11 @@ "test": "101-250k", "runner": "ubuntu-latest", "timeout-minutes": 60 + }, + { + "test": "archive-101-250k", + "runner": "ubuntu-latest", + "timeout-minutes": 60 } ] }, @@ -14,6 +19,11 @@ "test": "101-250k", "runner": "ubuntu-latest", "timeout-minutes": 60 + }, + { + "test": "archive-101-250k", + "runner": "ubuntu-latest", + "timeout-minutes": 60 } ] } diff --git a/.github/workflows/chaos-test.yml b/.github/workflows/chaos-test.yml index 75616f1e9a70..5fd23603e11d 100644 --- a/.github/workflows/chaos-test.yml +++ b/.github/workflows/chaos-test.yml @@ -95,7 +95,7 @@ jobs: - uses: actions/checkout@v4 - name: Run chaos test with Firewood shell: nix develop --impure --command bash -x {0} - run: ./scripts/tests.firewood_chaos.sh "${{ matrix.test || '' }}" + run: ./scripts/run_task.sh test-firewood-chaos -- "${{ matrix.test || '' }}" env: START_BLOCK: ${{ matrix.start-block }} END_BLOCK: ${{ matrix.end-block }} diff --git a/Taskfile.yml b/Taskfile.yml index a7967241f62b..3016937555da 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -239,6 +239,10 @@ tasks: cmds: - cmd: bash -x ./scripts/tests.e2e.kube.sh --ginkgo.focus-file=xsvm.go {{.CLI_ARGS}} + test-firewood-chaos: + desc: Runs Firewood chaos test. Run with 'help' to see all available tests. + cmd: ./scripts/tests.firewood_chaos.sh {{.CLI_ARGS}} + # To use a different fuzz time, run `task test-fuzz FUZZTIME=[value in seconds]`. # A value of `-1` will run until it encounters a failing output. diff --git a/scripts/tests.firewood_chaos.sh b/scripts/tests.firewood_chaos.sh index 42aa6280e0e3..f42a419fe0fa 100755 --- a/scripts/tests.firewood_chaos.sh +++ b/scripts/tests.firewood_chaos.sh @@ -33,7 +33,7 @@ Usage: $0 [test-name] Available tests: help - Show this help message 101-250k - Blocks 101-250k with Firewood -EOF + archive-101-250k - Blocks 101-250k with Firewood archive mode } SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -65,6 +65,14 @@ if [[ -n "$TEST_NAME" ]]; then MIN_WAIT_TIME="${MIN_WAIT_TIME:-120s}" MAX_WAIT_TIME="${MAX_WAIT_TIME:-150s}" ;; + archive-101-250k) + BLOCK_DIR_SRC="${BLOCK_DIR_SRC:-cchain-mainnet-blocks-1m-ldb}" + CURRENT_STATE_DIR_SRC="${CURRENT_STATE_DIR_SRC:-cchain-current-state-firewood-archive-100}" + START_BLOCK="${START_BLOCK:-101}" + END_BLOCK="${END_BLOCK:-250000}" + MIN_WAIT_TIME="${MIN_WAIT_TIME:-120s}" + MAX_WAIT_TIME="${MAX_WAIT_TIME:-150s}" + ;; *) error "Unknown test '$TEST_NAME'" ;; @@ -75,7 +83,7 @@ fi if [[ -n "${BLOCK_DIR_SRC:-}" && -n "${CURRENT_STATE_DIR_SRC:-}" ]]; then # S3 mode - import data TIMESTAMP=$(date '+%Y%m%d-%H%M%S') - EXECUTION_DATA_DIR="${EXECUTION_DATA_DIR:-/tmp/reexec-${TEST_NAME:-custom}-${TIMESTAMP}}" + EXECUTION_DATA_DIR="${EXECUTION_DATA_DIR:-/tmp/chaos-test-${TEST_NAME:-custom}-${TIMESTAMP}}" BLOCK_DIR_SRC="${BLOCK_DIR_SRC}" \ CURRENT_STATE_DIR_SRC="${CURRENT_STATE_DIR_SRC}" \ From 8a239e6a7c69a74fb7c723c17c22acd96c243d8f Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 6 Jan 2026 09:44:55 -0500 Subject: [PATCH 13/24] fix: EOF --- scripts/tests.firewood_chaos.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/tests.firewood_chaos.sh b/scripts/tests.firewood_chaos.sh index f42a419fe0fa..1a922a3d93db 100755 --- a/scripts/tests.firewood_chaos.sh +++ b/scripts/tests.firewood_chaos.sh @@ -34,6 +34,7 @@ Available tests: help - Show this help message 101-250k - Blocks 101-250k with Firewood archive-101-250k - Blocks 101-250k with Firewood archive mode +EOF } SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" From c84015dcfcb481e6ff949ada74d2203a20d2a05e Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 6 Jan 2026 10:09:16 -0500 Subject: [PATCH 14/24] fix: config --- scripts/tests.firewood_chaos.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/tests.firewood_chaos.sh b/scripts/tests.firewood_chaos.sh index 1a922a3d93db..8494c21583fd 100755 --- a/scripts/tests.firewood_chaos.sh +++ b/scripts/tests.firewood_chaos.sh @@ -25,6 +25,9 @@ set -euo pipefail # END_BLOCK: The ending block height (inclusive). # MIN_WAIT_TIME: The minimum amount of time to wait before crashing. # MAX_WAIT_TIME: The maximum amount of time to wait before crashing. +# +# Optional: +# CONFIG: VM config preset (firewood, firewood-archive). show_usage() { cat < Date: Tue, 6 Jan 2026 10:14:41 -0500 Subject: [PATCH 15/24] refactor: remove JSON --- .github/workflows/chaos-test.json | 30 ------------------------------ .github/workflows/chaos-test.yml | 7 +++---- 2 files changed, 3 insertions(+), 34 deletions(-) delete mode 100644 .github/workflows/chaos-test.json diff --git a/.github/workflows/chaos-test.json b/.github/workflows/chaos-test.json deleted file mode 100644 index 8693ce1d6ad2..000000000000 --- a/.github/workflows/chaos-test.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "pull_request": { - "include": [ - { - "test": "101-250k", - "runner": "ubuntu-latest", - "timeout-minutes": 60 - }, - { - "test": "archive-101-250k", - "runner": "ubuntu-latest", - "timeout-minutes": 60 - } - ] - }, - "schedule": { - "include": [ - { - "test": "101-250k", - "runner": "ubuntu-latest", - "timeout-minutes": 60 - }, - { - "test": "archive-101-250k", - "runner": "ubuntu-latest", - "timeout-minutes": 60 - } - ] - } -} diff --git a/.github/workflows/chaos-test.yml b/.github/workflows/chaos-test.yml index 5fd23603e11d..1bbbbc1aaac8 100644 --- a/.github/workflows/chaos-test.yml +++ b/.github/workflows/chaos-test.yml @@ -64,10 +64,9 @@ jobs: echo EOF } >> "$GITHUB_OUTPUT" else - json_string=$(jq -r ".\"${{ github.event_name }}\"" .github/workflows/chaos-test.json) { echo "matrix<> "$GITHUB_OUTPUT" fi @@ -77,8 +76,8 @@ jobs: strategy: fail-fast: false matrix: ${{ fromJSON(needs.define-matrix.outputs.matrix) }} - timeout-minutes: ${{ matrix.timeout-minutes }} - runs-on: ${{ matrix.runner }} + timeout-minutes: ${{ matrix.timeout-minutes || 60 }} + runs-on: ${{ matrix.runner || 'ubuntu-latest' }} permissions: id-token: write contents: read From f1c7cf0be160c2f3f419000d1f81a2d27b65ee73 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 6 Jan 2026 10:39:02 -0500 Subject: [PATCH 16/24] fix: config (again) --- scripts/tests.firewood_chaos.sh | 10 +++---- tests/reexecute/chaos/main.go | 50 ++++++++++++++++++++++++++------- 2 files changed, 45 insertions(+), 15 deletions(-) diff --git a/scripts/tests.firewood_chaos.sh b/scripts/tests.firewood_chaos.sh index 8494c21583fd..581bf78d5594 100755 --- a/scripts/tests.firewood_chaos.sh +++ b/scripts/tests.firewood_chaos.sh @@ -25,8 +25,6 @@ set -euo pipefail # END_BLOCK: The ending block height (inclusive). # MIN_WAIT_TIME: The minimum amount of time to wait before crashing. # MAX_WAIT_TIME: The maximum amount of time to wait before crashing. -# -# Optional: # CONFIG: VM config preset (firewood, firewood-archive). show_usage() { @@ -120,12 +118,13 @@ elif [[ -z "${BLOCK_DIR:-}" || -z "${CURRENT_STATE_DIR:-}" ]]; then fi # Validate block range -if [[ -z "${START_BLOCK:-}" || -z "${END_BLOCK:-}" ]]; then - error "START_BLOCK and END_BLOCK are required" +if [[ -z "${START_BLOCK:-}" || -z "${END_BLOCK:-}" || -z "${CONFIG:-}" ]]; then + error "START_BLOCK and END_BLOCK and CONFIG are required" fi echo "=== Firewood Chaos Test: ${TEST_NAME:-custom} ===" echo "Blocks: ${START_BLOCK} - ${END_BLOCK}" +echo "CONFIG: ${CONFIG}" echo "Crashing between ${MIN_WAIT_TIME} and ${MAX_WAIT_TIME}" echo "=== Running Chaos Test ===" @@ -135,4 +134,5 @@ go run ./tests/reexecute/chaos \ --current-state-dir="${CURRENT_STATE_DIR}" \ --block-dir="${BLOCK_DIR}" \ --min-wait-time="${MIN_WAIT_TIME}" \ - --max-wait-time="${MAX_WAIT_TIME}" \ No newline at end of file + --max-wait-time="${MAX_WAIT_TIME}" \ + --config="${CONFIG}" \ No newline at end of file diff --git a/tests/reexecute/chaos/main.go b/tests/reexecute/chaos/main.go index f61c475edb13..fb6b52e67a6b 100644 --- a/tests/reexecute/chaos/main.go +++ b/tests/reexecute/chaos/main.go @@ -7,11 +7,14 @@ import ( "context" "flag" "fmt" + "maps" "math/rand" "os" "os/exec" "path/filepath" + "slices" "strconv" + "strings" "syscall" "time" @@ -35,13 +38,23 @@ var ( endBlockArg uint64 minWaitTimeArg time.Duration maxWaitTimeArg time.Duration - - firewoodConfig = `{ - "state-scheme": "firewood", - "snapshot-cache": 0, - "pruning-enabled": true, - "state-sync-enabled": false - }` + configNameArg string + configBytesArg []byte + + predefinedConfigs = map[string]string{ + "firewood": `{ + "state-scheme": "firewood", + "snapshot-cache": 0, + "pruning-enabled": true, + "state-sync-enabled": false + }`, + "firewood-archive": `{ + "state-scheme": "firewood", + "snapshot-cache": 0, + "pruning-enabled": false, + "state-sync-enabled": false + }`, + } ) func init() { @@ -54,7 +67,18 @@ func init() { flag.DurationVar(&minWaitTimeArg, "min-wait-time", 20*time.Second, "Minimum amount of time to wait before crashing.") flag.DurationVar(&maxWaitTimeArg, "max-wait-time", 30*time.Second, "Maximum amount of time to wait before crashing.") + predefinedConfigKeys := slices.Collect(maps.Keys(predefinedConfigs)) + predefinedConfigOptionsStr := fmt.Sprintf("[%s]", strings.Join(predefinedConfigKeys, ", ")) + flag.StringVar(&configNameArg, "config", configNameArg, fmt.Sprintf("Specifies the predefined config to use for the VM. Options include %s.", predefinedConfigOptionsStr)) + flag.Parse() + + predefinedConfigStr, ok := predefinedConfigs[configNameArg] + if !ok { + fmt.Fprintf(os.Stderr, "invalid config name %q. Valid options include %s.\n", configNameArg, predefinedConfigOptionsStr) + os.Exit(1) + } + configBytesArg = []byte(predefinedConfigStr) } func main() { @@ -70,6 +94,8 @@ func main() { currentStateDirArg, startBlockArg, endBlockArg, + configNameArg, + configBytesArg, ) } @@ -92,11 +118,13 @@ func run( currentStateDir string, startBlock uint64, endBlock uint64, + configName string, + configBytes []byte, ) { r := require.New(tc) log := tc.Log() - cmd := createReexecutionCmd(blockDir, currentStateDir, startBlock, endBlock) + cmd := createReexecutionCmd(blockDir, currentStateDir, startBlock, endBlock, configName) // Set process group ID so we can kill all child processes cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} @@ -157,7 +185,7 @@ func run( ctx, db, chainDataDir, - []byte(firewoodConfig), + configBytes, metrics.NewPrefixGatherer(), prometheus.NewRegistry(), ) @@ -174,7 +202,7 @@ func run( log.Debug("read VM", zap.Uint64("latest height", lastAcceptedBlock.Height())) - cmd = createReexecutionCmd(blockDir, currentStateDir, lastAcceptedBlock.Height()+1, endBlock) + cmd = createReexecutionCmd(blockDir, currentStateDir, lastAcceptedBlock.Height()+1, endBlock, configName) // 5. Restart the reexecution test from the recovered height to verify state consistency r.NoError(cmd.Run()) @@ -209,6 +237,7 @@ func createReexecutionCmd( currentStateDir string, startBlock uint64, endBlock uint64, + configName string, ) *exec.Cmd { cmd := exec.Command("go", "run", @@ -218,6 +247,7 @@ func createReexecutionCmd( "--current-state-dir="+currentStateDir, "--start-block="+strconv.Itoa(int(startBlock)), "--end-block="+strconv.Itoa(int(endBlock)), + "--config="+configName, ) cmd.Stdout = os.Stdout From ef82e6aefe7af5b333efef5b1902dce552ca6e25 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 6 Jan 2026 10:39:51 -0500 Subject: [PATCH 17/24] chore: rename chaos test yml --- .github/workflows/{chaos-test.yml => firewood-chaos-test.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{chaos-test.yml => firewood-chaos-test.yml} (100%) diff --git a/.github/workflows/chaos-test.yml b/.github/workflows/firewood-chaos-test.yml similarity index 100% rename from .github/workflows/chaos-test.yml rename to .github/workflows/firewood-chaos-test.yml From c7931999f7bacd1df121f5c761466bf44c5ec2be Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 6 Jan 2026 11:08:30 -0500 Subject: [PATCH 18/24] refactor!: merge chaos script into reexecution script" --- .github/workflows/firewood-chaos-test.yml | 14 ++- Taskfile.yml | 4 - scripts/benchmark_cchain_range.sh | 117 ++++++++++++++---- scripts/tests.firewood_chaos.sh | 138 ---------------------- 4 files changed, 103 insertions(+), 170 deletions(-) delete mode 100755 scripts/tests.firewood_chaos.sh diff --git a/.github/workflows/firewood-chaos-test.yml b/.github/workflows/firewood-chaos-test.yml index 1bbbbc1aaac8..0c7941015231 100644 --- a/.github/workflows/firewood-chaos-test.yml +++ b/.github/workflows/firewood-chaos-test.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: inputs: test: - description: 'Test name to run (e.g., 101-250k). Leave empty to use custom inputs below.' + description: 'Test name to run (e.g., chaos-101-250k). Leave empty to use custom inputs below.' default: '' # Custom inputs (used when test is not provided) start-block: @@ -20,6 +20,9 @@ on: description: 'The current state directory. Supports S3 directory/zip and local directories.' default: '' # Chaos test specific inputs + config: + description: 'VM config preset (firewood, firewood-archive). Required for custom tests.' + default: 'firewood' min-wait-time: description: 'Minimum wait time before killing the process (e.g., 120s, 2m).' default: '120s' @@ -51,12 +54,13 @@ jobs: if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then { echo "matrix<> "$GITHUB_OUTPUT" fi @@ -94,12 +98,14 @@ jobs: - uses: actions/checkout@v4 - name: Run chaos test with Firewood shell: nix develop --impure --command bash -x {0} - run: ./scripts/run_task.sh test-firewood-chaos -- "${{ matrix.test || '' }}" + run: ./scripts/run_task.sh test-cchain-reexecution -- "${{ matrix.test || '' }}" env: + CHAOS_MODE: 'true' START_BLOCK: ${{ matrix.start-block }} END_BLOCK: ${{ matrix.end-block }} BLOCK_DIR_SRC: ${{ matrix.block-dir-src }} CURRENT_STATE_DIR_SRC: ${{ matrix.current-state-dir-src }} + CONFIG: ${{ matrix.config }} MIN_WAIT_TIME: ${{ matrix.min-wait-time }} MAX_WAIT_TIME: ${{ matrix.max-wait-time }} diff --git a/Taskfile.yml b/Taskfile.yml index 3016937555da..a7967241f62b 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -239,10 +239,6 @@ tasks: cmds: - cmd: bash -x ./scripts/tests.e2e.kube.sh --ginkgo.focus-file=xsvm.go {{.CLI_ARGS}} - test-firewood-chaos: - desc: Runs Firewood chaos test. Run with 'help' to see all available tests. - cmd: ./scripts/tests.firewood_chaos.sh {{.CLI_ARGS}} - # To use a different fuzz time, run `task test-fuzz FUZZTIME=[value in seconds]`. # A value of `-1` will run until it encounters a failing output. diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh index df37f8a01fb5..1786a4d2929e 100755 --- a/scripts/benchmark_cchain_range.sh +++ b/scripts/benchmark_cchain_range.sh @@ -2,11 +2,14 @@ set -euo pipefail -# C-Chain Re-execution Benchmark Script +# C-Chain Re-execution Benchmark and Chaos Test Script # # Usage: # ./benchmark_cchain_range.sh [test-name] # +# Test names starting with "chaos-" run crash tests. +# All other test names run reexecution tests. +# # To see available tests: use `help` as the test name or invoke # without a test name and without required env vars. # @@ -24,14 +27,20 @@ set -euo pipefail # START_BLOCK: The starting block height (inclusive). # END_BLOCK: The ending block height (inclusive). # -# Optional: -# CONFIG: VM config preset (default, archive, firewood). +# Optional (reexecution tests): +# CONFIG: VM config preset (default, archive, firewood, firewood-archive). # LABELS: Comma-separated key=value pairs for metric labels. # BENCHMARK_OUTPUT_FILE: If set, benchmark output is also written to this file. # METRICS_SERVER_ENABLED: If set, enables the metrics server. # METRICS_SERVER_PORT: If set, determines the port the metrics server will listen to. # METRICS_COLLECTOR_ENABLED: If set, enables the metrics collector. # PUSH_POST_STATE: S3 destination to push current-state after execution. +# +# Required (chaos tests): +# CHAOS_MODE: Set to "true" to run chaos test with custom parameters. +# CONFIG: VM config preset (firewood, firewood-archive). +# MIN_WAIT_TIME: Minimum wait before crash (e.g., 120s). +# MAX_WAIT_TIME: Maximum wait before crash (e.g., 150s). SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -51,6 +60,8 @@ Usage: $0 [test-name] Available tests: help - Show this help message + + Reexecution tests: default - Quick test run (blocks 101-200, hashdb) hashdb-101-250k - Blocks 101-250k with hashdb hashdb-archive-101-250k - Blocks 101-250k with hashdb archive @@ -58,6 +69,10 @@ Available tests: firewood-101-250k - Blocks 101-250k with firewood firewood-33m-33m500k - Blocks 33m-33.5m with firewood firewood-33m-40m - Blocks 33m-40m with firewood + + Chaos tests: + chaos-101-250k - Blocks 101-250k with Firewood chaos test + chaos-archive-101-250k - Blocks 101-250k with Firewood archive chaos test EOF } @@ -116,12 +131,36 @@ if [[ -n "$TEST_NAME" ]]; then END_BLOCK="${END_BLOCK:-40000000}" CONFIG="${CONFIG:-firewood}" ;; + chaos-101-250k) + BLOCK_DIR_SRC="${BLOCK_DIR_SRC:-cchain-mainnet-blocks-1m-ldb}" + CURRENT_STATE_DIR_SRC="${CURRENT_STATE_DIR_SRC:-cchain-current-state-firewood-100}" + START_BLOCK="${START_BLOCK:-101}" + END_BLOCK="${END_BLOCK:-250000}" + MIN_WAIT_TIME="${MIN_WAIT_TIME:-120s}" + MAX_WAIT_TIME="${MAX_WAIT_TIME:-150s}" + CONFIG="${CONFIG:-firewood}" + ;; + chaos-archive-101-250k) + BLOCK_DIR_SRC="${BLOCK_DIR_SRC:-cchain-mainnet-blocks-1m-ldb}" + CURRENT_STATE_DIR_SRC="${CURRENT_STATE_DIR_SRC:-cchain-current-state-firewood-archive-100}" + START_BLOCK="${START_BLOCK:-101}" + END_BLOCK="${END_BLOCK:-250000}" + MIN_WAIT_TIME="${MIN_WAIT_TIME:-120s}" + MAX_WAIT_TIME="${MAX_WAIT_TIME:-150s}" + CONFIG="${CONFIG:-firewood-archive}" + ;; *) error "Unknown test '$TEST_NAME'" ;; esac fi +# Detect if this is a chaos test +IS_CHAOS_TEST=false +if [[ "${TEST_NAME:-}" == chaos-* ]] || [[ "${CHAOS_MODE:-}" == "true" ]]; then + IS_CHAOS_TEST=true +fi + # Determine data source: S3 import or local paths if [[ -n "${BLOCK_DIR_SRC:-}" && -n "${CURRENT_STATE_DIR_SRC:-}" ]]; then # S3 mode - import data @@ -150,6 +189,9 @@ elif [[ -z "${BLOCK_DIR:-}" || -z "${CURRENT_STATE_DIR:-}" ]]; then echo " Block range:" [[ -n "${START_BLOCK:-}" ]] && echo " START_BLOCK: ${START_BLOCK}" || echo " START_BLOCK: (not set)" [[ -n "${END_BLOCK:-}" ]] && echo " END_BLOCK: ${END_BLOCK}" || echo " END_BLOCK: (not set)" + echo " Timeouts (chaos tests):" + [[ -n "${MIN_WAIT_TIME:-}" ]] && echo " MIN_WAIT_TIME: ${MIN_WAIT_TIME}" || echo " MIN_WAIT_TIME: (not set)" + [[ -n "${MAX_WAIT_TIME:-}" ]] && echo " MAX_WAIT_TIME: ${MAX_WAIT_TIME}" || echo " MAX_WAIT_TIME: (not set)" exit 1 fi @@ -158,25 +200,52 @@ if [[ -z "${START_BLOCK:-}" || -z "${END_BLOCK:-}" ]]; then error "START_BLOCK and END_BLOCK are required" fi -echo "=== C-Chain Re-execution: ${TEST_NAME:-custom} ===" -echo "Blocks: ${START_BLOCK} - ${END_BLOCK}" -echo "Config: ${CONFIG:-default}" - -echo "=== Running re-execution ===" -go run github.com/ava-labs/avalanchego/tests/reexecute/c \ - --block-dir="${BLOCK_DIR}" \ - --current-state-dir="${CURRENT_STATE_DIR}" \ - ${RUNNER_TYPE:+--runner="${RUNNER_TYPE}"} \ - ${CONFIG:+--config="${CONFIG}"} \ - --start-block="${START_BLOCK}" \ - --end-block="${END_BLOCK}" \ - ${LABELS:+--labels="${LABELS}"} \ - ${BENCHMARK_OUTPUT_FILE:+--benchmark-output-file="${BENCHMARK_OUTPUT_FILE}"} \ - ${METRICS_SERVER_ENABLED:+--metrics-server-enabled="${METRICS_SERVER_ENABLED}"} \ - ${METRICS_SERVER_PORT:+--metrics-server-port="${METRICS_SERVER_PORT}"} \ - ${METRICS_COLLECTOR_ENABLED:+--metrics-collector-enabled="${METRICS_COLLECTOR_ENABLED}"} - -if [[ -n "${PUSH_POST_STATE:-}" ]]; then - echo "=== Pushing post-state to S3 ===" - "${SCRIPT_DIR}/copy_dir.sh" "${CURRENT_STATE_DIR}/" "${PUSH_POST_STATE}" +# Chaos tests require additional validation +if [[ "$IS_CHAOS_TEST" == "true" ]]; then + if [[ -z "${CONFIG:-}" ]]; then + error "CONFIG is required for chaos tests" + fi + if [[ -z "${MIN_WAIT_TIME:-}" || -z "${MAX_WAIT_TIME:-}" ]]; then + error "MIN_WAIT_TIME and MAX_WAIT_TIME are required for chaos tests" + fi +fi + +if [[ "$IS_CHAOS_TEST" == "true" ]]; then + echo "=== Firewood Chaos Test: ${TEST_NAME:-custom} ===" + echo "Blocks: ${START_BLOCK} - ${END_BLOCK}" + echo "CONFIG: ${CONFIG}" + echo "Crashing between ${MIN_WAIT_TIME} and ${MAX_WAIT_TIME}" + + echo "=== Running Chaos Test ===" + go run ./tests/reexecute/chaos \ + --start-block="${START_BLOCK}" \ + --end-block="${END_BLOCK}" \ + --current-state-dir="${CURRENT_STATE_DIR}" \ + --block-dir="${BLOCK_DIR}" \ + --min-wait-time="${MIN_WAIT_TIME}" \ + --max-wait-time="${MAX_WAIT_TIME}" \ + --config="${CONFIG}" +else + echo "=== C-Chain Re-execution: ${TEST_NAME:-custom} ===" + echo "Blocks: ${START_BLOCK} - ${END_BLOCK}" + echo "Config: ${CONFIG:-default}" + + echo "=== Running re-execution ===" + go run github.com/ava-labs/avalanchego/tests/reexecute/c \ + --block-dir="${BLOCK_DIR}" \ + --current-state-dir="${CURRENT_STATE_DIR}" \ + ${RUNNER_TYPE:+--runner="${RUNNER_TYPE}"} \ + ${CONFIG:+--config="${CONFIG}"} \ + --start-block="${START_BLOCK}" \ + --end-block="${END_BLOCK}" \ + ${LABELS:+--labels="${LABELS}"} \ + ${BENCHMARK_OUTPUT_FILE:+--benchmark-output-file="${BENCHMARK_OUTPUT_FILE}"} \ + ${METRICS_SERVER_ENABLED:+--metrics-server-enabled="${METRICS_SERVER_ENABLED}"} \ + ${METRICS_SERVER_PORT:+--metrics-server-port="${METRICS_SERVER_PORT}"} \ + ${METRICS_COLLECTOR_ENABLED:+--metrics-collector-enabled="${METRICS_COLLECTOR_ENABLED}"} + + if [[ -n "${PUSH_POST_STATE:-}" ]]; then + echo "=== Pushing post-state to S3 ===" + "${SCRIPT_DIR}/copy_dir.sh" "${CURRENT_STATE_DIR}/" "${PUSH_POST_STATE}" + fi fi diff --git a/scripts/tests.firewood_chaos.sh b/scripts/tests.firewood_chaos.sh deleted file mode 100755 index 581bf78d5594..000000000000 --- a/scripts/tests.firewood_chaos.sh +++ /dev/null @@ -1,138 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail - -# Firewood Chaos Test -# -# Usage: -# ./tests.firewood_chaos.sh [test-name] -# -# To see available tests: use `help` as the test name or invoke -# without a test name and without required env vars. -# -# Test names configure defaults for S3 sources and block ranges. -# All defaults can be overridden via environment variables. -# -# Environment variables: -# Data sources (provide S3 sources OR local paths): -# BLOCK_DIR_SRC: S3 object key for blocks (triggers S3 import). -# CURRENT_STATE_DIR_SRC: S3 object key for state (triggers S3 import). -# BLOCK_DIR: Path to local block directory. -# CURRENT_STATE_DIR: Path to local current state directory. -# -# Required: -# START_BLOCK: The starting block height (inclusive). -# END_BLOCK: The ending block height (inclusive). -# MIN_WAIT_TIME: The minimum amount of time to wait before crashing. -# MAX_WAIT_TIME: The maximum amount of time to wait before crashing. -# CONFIG: VM config preset (firewood, firewood-archive). - -show_usage() { - cat <&2 - fi - exit 1 -} - -# Set defaults based on test name (if provided) -TEST_NAME="${1:-}" -if [[ -n "$TEST_NAME" ]]; then - shift - case "$TEST_NAME" in - help) - show_usage - exit 0 - ;; - 101-250k) - BLOCK_DIR_SRC="${BLOCK_DIR_SRC:-cchain-mainnet-blocks-1m-ldb}" - CURRENT_STATE_DIR_SRC="${CURRENT_STATE_DIR_SRC:-cchain-current-state-firewood-100}" - START_BLOCK="${START_BLOCK:-101}" - END_BLOCK="${END_BLOCK:-250000}" - MIN_WAIT_TIME="${MIN_WAIT_TIME:-120s}" - MAX_WAIT_TIME="${MAX_WAIT_TIME:-150s}" - CONFIG="${CONFIG:-firewood}" - ;; - archive-101-250k) - BLOCK_DIR_SRC="${BLOCK_DIR_SRC:-cchain-mainnet-blocks-1m-ldb}" - CURRENT_STATE_DIR_SRC="${CURRENT_STATE_DIR_SRC:-cchain-current-state-firewood-archive-100}" - START_BLOCK="${START_BLOCK:-101}" - END_BLOCK="${END_BLOCK:-250000}" - MIN_WAIT_TIME="${MIN_WAIT_TIME:-120s}" - MAX_WAIT_TIME="${MAX_WAIT_TIME:-150s}" - CONFIG="${CONFIG:-firewood-archive}" - ;; - *) - error "Unknown test '$TEST_NAME'" - ;; - esac -fi - -# Determine data source: S3 import or local paths -if [[ -n "${BLOCK_DIR_SRC:-}" && -n "${CURRENT_STATE_DIR_SRC:-}" ]]; then - # S3 mode - import data - TIMESTAMP=$(date '+%Y%m%d-%H%M%S') - EXECUTION_DATA_DIR="${EXECUTION_DATA_DIR:-/tmp/chaos-test-${TEST_NAME:-custom}-${TIMESTAMP}}" - - BLOCK_DIR_SRC="${BLOCK_DIR_SRC}" \ - CURRENT_STATE_DIR_SRC="${CURRENT_STATE_DIR_SRC}" \ - EXECUTION_DATA_DIR="${EXECUTION_DATA_DIR}" \ - "${SCRIPT_DIR}/import_cchain_data.sh" - - BLOCK_DIR="${EXECUTION_DATA_DIR}/blocks" - CURRENT_STATE_DIR="${EXECUTION_DATA_DIR}/current-state" -elif [[ -n "${BLOCK_DIR_SRC:-}" || -n "${CURRENT_STATE_DIR_SRC:-}" ]]; then - error "Both BLOCK_DIR_SRC and CURRENT_STATE_DIR_SRC must be provided together" -elif [[ -z "${BLOCK_DIR:-}" || -z "${CURRENT_STATE_DIR:-}" ]]; then - show_usage - echo "" - echo "Env vars status:" - echo " S3 sources:" - [[ -n "${BLOCK_DIR_SRC:-}" ]] && echo " BLOCK_DIR_SRC: ${BLOCK_DIR_SRC}" || echo " BLOCK_DIR_SRC: (not set)" - [[ -n "${CURRENT_STATE_DIR_SRC:-}" ]] && echo " CURRENT_STATE_DIR_SRC: ${CURRENT_STATE_DIR_SRC}" || echo " CURRENT_STATE_DIR_SRC: (not set)" - echo " Local paths:" - [[ -n "${BLOCK_DIR:-}" ]] && echo " BLOCK_DIR: ${BLOCK_DIR}" || echo " BLOCK_DIR: (not set)" - [[ -n "${CURRENT_STATE_DIR:-}" ]] && echo " CURRENT_STATE_DIR: ${CURRENT_STATE_DIR}" || echo " CURRENT_STATE_DIR: (not set)" - echo " Block range:" - [[ -n "${START_BLOCK:-}" ]] && echo " START_BLOCK: ${START_BLOCK}" || echo " START_BLOCK: (not set)" - [[ -n "${END_BLOCK:-}" ]] && echo " END_BLOCK: ${END_BLOCK}" || echo " END_BLOCK: (not set)" - echo " Timeouts:" - [[ -n "${MIN_WAIT_TIME:-}" ]] && echo " MIN_WAIT_TIME: ${MIN_WAIT_TIME}" || echo " MIN_WAIT_TIME: (not set)" - [[ -n "${MAX_WAIT_TIME:-}" ]] && echo " MAX_WAIT_TIME: ${MAX_WAIT_TIME}" || echo " MAX_WAIT_TIME: (not set)" - exit 1 -fi - -# Validate block range -if [[ -z "${START_BLOCK:-}" || -z "${END_BLOCK:-}" || -z "${CONFIG:-}" ]]; then - error "START_BLOCK and END_BLOCK and CONFIG are required" -fi - -echo "=== Firewood Chaos Test: ${TEST_NAME:-custom} ===" -echo "Blocks: ${START_BLOCK} - ${END_BLOCK}" -echo "CONFIG: ${CONFIG}" -echo "Crashing between ${MIN_WAIT_TIME} and ${MAX_WAIT_TIME}" - -echo "=== Running Chaos Test ===" -go run ./tests/reexecute/chaos \ - --start-block="${START_BLOCK}" \ - --end-block="${END_BLOCK}" \ - --current-state-dir="${CURRENT_STATE_DIR}" \ - --block-dir="${BLOCK_DIR}" \ - --min-wait-time="${MIN_WAIT_TIME}" \ - --max-wait-time="${MAX_WAIT_TIME}" \ - --config="${CONFIG}" \ No newline at end of file From b472805be58741dbe1b6bb7d33bbcb86ae3f764b Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 6 Jan 2026 11:17:22 -0500 Subject: [PATCH 19/24] chore: nits --- scripts/benchmark_cchain_range.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh index 1786a4d2929e..7a5ba4c1c254 100755 --- a/scripts/benchmark_cchain_range.sh +++ b/scripts/benchmark_cchain_range.sh @@ -71,8 +71,8 @@ Available tests: firewood-33m-40m - Blocks 33m-40m with firewood Chaos tests: - chaos-101-250k - Blocks 101-250k with Firewood chaos test - chaos-archive-101-250k - Blocks 101-250k with Firewood archive chaos test + chaos-101-250k - Blocks 101-250k with Firewood + chaos-archive-101-250k - Blocks 101-250k with Firewood archive EOF } From 219b24a3e837b99e7e5c13ea230b1a0a3d469401 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 6 Jan 2026 12:12:40 -0500 Subject: [PATCH 20/24] refactor!: address PR comments --- .github/workflows/firewood-chaos-test.yml | 6 +- scripts/benchmark_cchain_range.sh | 109 +++++++++------------- 2 files changed, 45 insertions(+), 70 deletions(-) diff --git a/.github/workflows/firewood-chaos-test.yml b/.github/workflows/firewood-chaos-test.yml index 0c7941015231..5b2e615512bd 100644 --- a/.github/workflows/firewood-chaos-test.yml +++ b/.github/workflows/firewood-chaos-test.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: inputs: test: - description: 'Test name to run (e.g., chaos-101-250k). Leave empty to use custom inputs below.' + description: 'Test name to run (e.g., firewood-101-250k). Leave empty to use custom inputs below.' default: '' # Custom inputs (used when test is not provided) start-block: @@ -70,7 +70,7 @@ jobs: else { echo "matrix<> "$GITHUB_OUTPUT" fi @@ -100,7 +100,7 @@ jobs: shell: nix develop --impure --command bash -x {0} run: ./scripts/run_task.sh test-cchain-reexecution -- "${{ matrix.test || '' }}" env: - CHAOS_MODE: 'true' + CHAOS_MODE: '1' START_BLOCK: ${{ matrix.start-block }} END_BLOCK: ${{ matrix.end-block }} BLOCK_DIR_SRC: ${{ matrix.block-dir-src }} diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh index 7a5ba4c1c254..d4f45c54f474 100755 --- a/scripts/benchmark_cchain_range.sh +++ b/scripts/benchmark_cchain_range.sh @@ -7,9 +7,6 @@ set -euo pipefail # Usage: # ./benchmark_cchain_range.sh [test-name] # -# Test names starting with "chaos-" run crash tests. -# All other test names run reexecution tests. -# # To see available tests: use `help` as the test name or invoke # without a test name and without required env vars. # @@ -37,7 +34,7 @@ set -euo pipefail # PUSH_POST_STATE: S3 destination to push current-state after execution. # # Required (chaos tests): -# CHAOS_MODE: Set to "true" to run chaos test with custom parameters. +# CHAOS_MODE: Set to enable chaos test mode (e.g., CHAOS_MODE=1). # CONFIG: VM config preset (firewood, firewood-archive). # MIN_WAIT_TIME: Minimum wait before crash (e.g., 120s). # MAX_WAIT_TIME: Maximum wait before crash (e.g., 150s). @@ -61,18 +58,14 @@ Usage: $0 [test-name] Available tests: help - Show this help message - Reexecution tests: default - Quick test run (blocks 101-200, hashdb) hashdb-101-250k - Blocks 101-250k with hashdb hashdb-archive-101-250k - Blocks 101-250k with hashdb archive hashdb-33m-33m500k - Blocks 33m-33.5m with hashdb firewood-101-250k - Blocks 101-250k with firewood + firewood-archive-101-250k - Blocks 101-250k with firewood archive firewood-33m-33m500k - Blocks 33m-33.5m with firewood firewood-33m-40m - Blocks 33m-40m with firewood - - Chaos tests: - chaos-101-250k - Blocks 101-250k with Firewood - chaos-archive-101-250k - Blocks 101-250k with Firewood archive EOF } @@ -117,6 +110,13 @@ if [[ -n "$TEST_NAME" ]]; then END_BLOCK="${END_BLOCK:-250000}" CONFIG="${CONFIG:-firewood}" ;; + firewood-archive-101-250k) + BLOCK_DIR_SRC="${BLOCK_DIR_SRC:-cchain-mainnet-blocks-1m-ldb}" + CURRENT_STATE_DIR_SRC="${CURRENT_STATE_DIR_SRC:-cchain-current-state-firewood-archive-100}" + START_BLOCK="${START_BLOCK:-101}" + END_BLOCK="${END_BLOCK:-250000}" + CONFIG="${CONFIG:-firewood-archive}" + ;; firewood-33m-33m500k) BLOCK_DIR_SRC="${BLOCK_DIR_SRC:-cchain-mainnet-blocks-30m-40m-ldb}" CURRENT_STATE_DIR_SRC="${CURRENT_STATE_DIR_SRC:-cchain-current-state-firewood-33m}" @@ -131,36 +131,12 @@ if [[ -n "$TEST_NAME" ]]; then END_BLOCK="${END_BLOCK:-40000000}" CONFIG="${CONFIG:-firewood}" ;; - chaos-101-250k) - BLOCK_DIR_SRC="${BLOCK_DIR_SRC:-cchain-mainnet-blocks-1m-ldb}" - CURRENT_STATE_DIR_SRC="${CURRENT_STATE_DIR_SRC:-cchain-current-state-firewood-100}" - START_BLOCK="${START_BLOCK:-101}" - END_BLOCK="${END_BLOCK:-250000}" - MIN_WAIT_TIME="${MIN_WAIT_TIME:-120s}" - MAX_WAIT_TIME="${MAX_WAIT_TIME:-150s}" - CONFIG="${CONFIG:-firewood}" - ;; - chaos-archive-101-250k) - BLOCK_DIR_SRC="${BLOCK_DIR_SRC:-cchain-mainnet-blocks-1m-ldb}" - CURRENT_STATE_DIR_SRC="${CURRENT_STATE_DIR_SRC:-cchain-current-state-firewood-archive-100}" - START_BLOCK="${START_BLOCK:-101}" - END_BLOCK="${END_BLOCK:-250000}" - MIN_WAIT_TIME="${MIN_WAIT_TIME:-120s}" - MAX_WAIT_TIME="${MAX_WAIT_TIME:-150s}" - CONFIG="${CONFIG:-firewood-archive}" - ;; *) error "Unknown test '$TEST_NAME'" ;; esac fi -# Detect if this is a chaos test -IS_CHAOS_TEST=false -if [[ "${TEST_NAME:-}" == chaos-* ]] || [[ "${CHAOS_MODE:-}" == "true" ]]; then - IS_CHAOS_TEST=true -fi - # Determine data source: S3 import or local paths if [[ -n "${BLOCK_DIR_SRC:-}" && -n "${CURRENT_STATE_DIR_SRC:-}" ]]; then # S3 mode - import data @@ -189,9 +165,11 @@ elif [[ -z "${BLOCK_DIR:-}" || -z "${CURRENT_STATE_DIR:-}" ]]; then echo " Block range:" [[ -n "${START_BLOCK:-}" ]] && echo " START_BLOCK: ${START_BLOCK}" || echo " START_BLOCK: (not set)" [[ -n "${END_BLOCK:-}" ]] && echo " END_BLOCK: ${END_BLOCK}" || echo " END_BLOCK: (not set)" - echo " Timeouts (chaos tests):" - [[ -n "${MIN_WAIT_TIME:-}" ]] && echo " MIN_WAIT_TIME: ${MIN_WAIT_TIME}" || echo " MIN_WAIT_TIME: (not set)" - [[ -n "${MAX_WAIT_TIME:-}" ]] && echo " MAX_WAIT_TIME: ${MAX_WAIT_TIME}" || echo " MAX_WAIT_TIME: (not set)" + if [[ -n "${CHAOS_MODE:-}" ]]; then + echo " Timeouts (chaos tests):" + [[ -n "${MIN_WAIT_TIME:-}" ]] && echo " MIN_WAIT_TIME: ${MIN_WAIT_TIME}" || echo " MIN_WAIT_TIME: (not set)" + [[ -n "${MAX_WAIT_TIME:-}" ]] && echo " MAX_WAIT_TIME: ${MAX_WAIT_TIME}" || echo " MAX_WAIT_TIME: (not set)" + fi exit 1 fi @@ -201,22 +179,24 @@ if [[ -z "${START_BLOCK:-}" || -z "${END_BLOCK:-}" ]]; then fi # Chaos tests require additional validation -if [[ "$IS_CHAOS_TEST" == "true" ]]; then - if [[ -z "${CONFIG:-}" ]]; then - error "CONFIG is required for chaos tests" - fi - if [[ -z "${MIN_WAIT_TIME:-}" || -z "${MAX_WAIT_TIME:-}" ]]; then - error "MIN_WAIT_TIME and MAX_WAIT_TIME are required for chaos tests" +if [[ -n "${CHAOS_MODE:-}" ]]; then + if [[ -z "${MIN_WAIT_TIME:-}" || -z "${MAX_WAIT_TIME:-}" || -z "${CONFIG:-}" ]]; then + error "MIN_WAIT_TIME and MAX_WAIT_TIME and CONFIG are required for chaos tests" fi fi -if [[ "$IS_CHAOS_TEST" == "true" ]]; then +if [[ -n "${CHAOS_MODE:-}" ]]; then echo "=== Firewood Chaos Test: ${TEST_NAME:-custom} ===" - echo "Blocks: ${START_BLOCK} - ${END_BLOCK}" - echo "CONFIG: ${CONFIG}" echo "Crashing between ${MIN_WAIT_TIME} and ${MAX_WAIT_TIME}" +else + echo "=== C-Chain Re-execution Test: ${TEST_NAME:-custom} ===" +fi + +echo "Blocks: ${START_BLOCK} - ${END_BLOCK}" +echo "CONFIG: ${CONFIG:-default}" - echo "=== Running Chaos Test ===" +echo "=== Running Test ===" +if [[ -n "${CHAOS_MODE:-}" ]]; then go run ./tests/reexecute/chaos \ --start-block="${START_BLOCK}" \ --end-block="${END_BLOCK}" \ @@ -226,26 +206,21 @@ if [[ "$IS_CHAOS_TEST" == "true" ]]; then --max-wait-time="${MAX_WAIT_TIME}" \ --config="${CONFIG}" else - echo "=== C-Chain Re-execution: ${TEST_NAME:-custom} ===" - echo "Blocks: ${START_BLOCK} - ${END_BLOCK}" - echo "Config: ${CONFIG:-default}" - - echo "=== Running re-execution ===" go run github.com/ava-labs/avalanchego/tests/reexecute/c \ - --block-dir="${BLOCK_DIR}" \ - --current-state-dir="${CURRENT_STATE_DIR}" \ - ${RUNNER_TYPE:+--runner="${RUNNER_TYPE}"} \ - ${CONFIG:+--config="${CONFIG}"} \ - --start-block="${START_BLOCK}" \ - --end-block="${END_BLOCK}" \ - ${LABELS:+--labels="${LABELS}"} \ - ${BENCHMARK_OUTPUT_FILE:+--benchmark-output-file="${BENCHMARK_OUTPUT_FILE}"} \ - ${METRICS_SERVER_ENABLED:+--metrics-server-enabled="${METRICS_SERVER_ENABLED}"} \ - ${METRICS_SERVER_PORT:+--metrics-server-port="${METRICS_SERVER_PORT}"} \ - ${METRICS_COLLECTOR_ENABLED:+--metrics-collector-enabled="${METRICS_COLLECTOR_ENABLED}"} - - if [[ -n "${PUSH_POST_STATE:-}" ]]; then - echo "=== Pushing post-state to S3 ===" - "${SCRIPT_DIR}/copy_dir.sh" "${CURRENT_STATE_DIR}/" "${PUSH_POST_STATE}" - fi + --block-dir="${BLOCK_DIR}" \ + --current-state-dir="${CURRENT_STATE_DIR}" \ + ${RUNNER_TYPE:+--runner="${RUNNER_TYPE}"} \ + ${CONFIG:+--config="${CONFIG}"} \ + --start-block="${START_BLOCK}" \ + --end-block="${END_BLOCK}" \ + ${LABELS:+--labels="${LABELS}"} \ + ${BENCHMARK_OUTPUT_FILE:+--benchmark-output-file="${BENCHMARK_OUTPUT_FILE}"} \ + ${METRICS_SERVER_ENABLED:+--metrics-server-enabled="${METRICS_SERVER_ENABLED}"} \ + ${METRICS_SERVER_PORT:+--metrics-server-port="${METRICS_SERVER_PORT}"} \ + ${METRICS_COLLECTOR_ENABLED:+--metrics-collector-enabled="${METRICS_COLLECTOR_ENABLED}"} + + if [[ -n "${PUSH_POST_STATE:-}" ]]; then + echo "=== Pushing post-state to S3 ===" + "${SCRIPT_DIR}/copy_dir.sh" "${CURRENT_STATE_DIR}/" "${PUSH_POST_STATE}" + fi fi From 59581a8588c966eca26e5ed88fd43023e00da018 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 6 Jan 2026 12:23:12 -0500 Subject: [PATCH 21/24] chore: defined tests set chaos defaults --- scripts/benchmark_cchain_range.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh index d4f45c54f474..98877fa98270 100755 --- a/scripts/benchmark_cchain_range.sh +++ b/scripts/benchmark_cchain_range.sh @@ -11,6 +11,8 @@ set -euo pipefail # without a test name and without required env vars. # # Test names configure defaults for S3 sources and block ranges. +# If running in chaos mode, test names also configure defaults for the VM Config +# and min/max wait times. # All defaults can be overridden via environment variables. # # Environment variables: @@ -137,6 +139,12 @@ if [[ -n "$TEST_NAME" ]]; then esac fi +# Set chaos test defaults when using a defined test with CHAOS_MODE +if [[ -n "${CHAOS_MODE:-}" && -n "${TEST_NAME:-}" ]]; then + MIN_WAIT_TIME="${MIN_WAIT_TIME:-120s}" + MAX_WAIT_TIME="${MAX_WAIT_TIME:-150s}" +fi + # Determine data source: S3 import or local paths if [[ -n "${BLOCK_DIR_SRC:-}" && -n "${CURRENT_STATE_DIR_SRC:-}" ]]; then # S3 mode - import data From c29a101a1d63e66b384d91d3fa267550d9d9c684 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 6 Jan 2026 13:33:32 -0500 Subject: [PATCH 22/24] refactor: simplify chaos test validation conditional --- scripts/benchmark_cchain_range.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh index 98877fa98270..e8524eae89f5 100755 --- a/scripts/benchmark_cchain_range.sh +++ b/scripts/benchmark_cchain_range.sh @@ -186,14 +186,12 @@ if [[ -z "${START_BLOCK:-}" || -z "${END_BLOCK:-}" ]]; then error "START_BLOCK and END_BLOCK are required" fi -# Chaos tests require additional validation if [[ -n "${CHAOS_MODE:-}" ]]; then + # Chaos tests require additional validation if [[ -z "${MIN_WAIT_TIME:-}" || -z "${MAX_WAIT_TIME:-}" || -z "${CONFIG:-}" ]]; then error "MIN_WAIT_TIME and MAX_WAIT_TIME and CONFIG are required for chaos tests" fi -fi -if [[ -n "${CHAOS_MODE:-}" ]]; then echo "=== Firewood Chaos Test: ${TEST_NAME:-custom} ===" echo "Crashing between ${MIN_WAIT_TIME} and ${MAX_WAIT_TIME}" else From ada0690ac765cb9445a72e837fa288c59c32ca82 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 6 Jan 2026 13:56:53 -0500 Subject: [PATCH 23/24] doc: make clear that chaos test works only with Firewood --- scripts/benchmark_cchain_range.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh index e8524eae89f5..1ac92cee635a 100755 --- a/scripts/benchmark_cchain_range.sh +++ b/scripts/benchmark_cchain_range.sh @@ -15,6 +15,8 @@ set -euo pipefail # and min/max wait times. # All defaults can be overridden via environment variables. # +# Note: chaos tests can only be run with firewood VM configs. +# # Environment variables: # Data sources (provide S3 sources OR local paths): # BLOCK_DIR_SRC: S3 object key for blocks (triggers S3 import). @@ -37,7 +39,7 @@ set -euo pipefail # # Required (chaos tests): # CHAOS_MODE: Set to enable chaos test mode (e.g., CHAOS_MODE=1). -# CONFIG: VM config preset (firewood, firewood-archive). +# CONFIG: VM config preset (firewood or firewood-archive only). # MIN_WAIT_TIME: Minimum wait before crash (e.g., 120s). # MAX_WAIT_TIME: Maximum wait before crash (e.g., 150s). From 4740b85c12ef8479532b81d0799616954efcbb19 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 6 Jan 2026 14:08:33 -0500 Subject: [PATCH 24/24] chore: license --- tests/reexecute/chaos/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/reexecute/chaos/main.go b/tests/reexecute/chaos/main.go index fb6b52e67a6b..5ed575563664 100644 --- a/tests/reexecute/chaos/main.go +++ b/tests/reexecute/chaos/main.go @@ -1,4 +1,4 @@ -// Copyright (C) 2019-2026, Ava Labs, Inc. All rights reserved. +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. // See the file LICENSE for licensing terms. package main