From 0178776293cefe4ce7efb08119d92ae5e134d188 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Sun, 12 Oct 2025 17:03:41 -0400 Subject: [PATCH 01/26] feat(reexecution/c): decouple metrics server and collector --- .../c-chain-reexecution-benchmark/action.yml | 2 +- Taskfile.yml | 8 +- scripts/benchmark_cchain_range.sh | 4 +- tests/reexecute/c/README.md | 15 +++- tests/reexecute/c/vm_reexecute_test.go | 87 ++++++++++++++++--- 5 files changed, 94 insertions(+), 22 deletions(-) diff --git a/.github/actions/c-chain-reexecution-benchmark/action.yml b/.github/actions/c-chain-reexecution-benchmark/action.yml index d0f119a02c80..523f28d63e79 100644 --- a/.github/actions/c-chain-reexecution-benchmark/action.yml +++ b/.github/actions/c-chain-reexecution-benchmark/action.yml @@ -95,7 +95,7 @@ runs: LABELS=${{ env.LABELS }} \ BENCHMARK_OUTPUT_FILE=${{ env.BENCHMARK_OUTPUT_FILE }} \ RUNNER_NAME=${{ inputs.runner_name }} \ - METRICS_ENABLED=true + METRICS_MODE="full" prometheus_url: ${{ inputs.prometheus-url }} prometheus_push_url: ${{ inputs.prometheus-push-url }} prometheus_username: ${{ inputs.prometheus-username }} diff --git a/Taskfile.yml b/Taskfile.yml index 7f722c814bba..323127e15417 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -203,7 +203,7 @@ tasks: END_BLOCK: '{{.END_BLOCK}}' LABELS: '{{.LABELS | default ""}}' BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}' - METRICS_ENABLED: '{{.METRICS_ENABLED | default "false"}}' + METRICS_MODE: '{{.METRICS_MODE | default "disabled"}}' cmd: | CURRENT_STATE_DIR={{.CURRENT_STATE_DIR}} \ BLOCK_DIR={{.BLOCK_DIR}} \ @@ -213,7 +213,7 @@ tasks: END_BLOCK={{.END_BLOCK}} \ LABELS={{.LABELS}} \ BENCHMARK_OUTPUT_FILE={{.BENCHMARK_OUTPUT_FILE}} \ - METRICS_ENABLED={{.METRICS_ENABLED}} \ + METRICS_MODE={{.METRICS_MODE}} \ bash -x ./scripts/benchmark_cchain_range.sh reexecute-cchain-range-with-copied-data: @@ -228,7 +228,7 @@ tasks: END_BLOCK: '{{.END_BLOCK | default "250000"}}' LABELS: '{{.LABELS | default ""}}' BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}' - METRICS_ENABLED: '{{.METRICS_ENABLED | default "false"}}' + METRICS_MODE: '{{.METRICS_MODE | default "disabled"}}' cmds: - task: import-cchain-reexecute-range vars: @@ -245,7 +245,7 @@ tasks: END_BLOCK: '{{.END_BLOCK}}' LABELS: '{{.LABELS}}' BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE}}' - METRICS_ENABLED: '{{.METRICS_ENABLED}}' + METRICS_MODE: '{{.METRICS_MODE}}' test-bootstrap-monitor-e2e: desc: Runs bootstrap monitor e2e tests diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh index 3072143da87a..9765eedbb610 100755 --- a/scripts/benchmark_cchain_range.sh +++ b/scripts/benchmark_cchain_range.sh @@ -25,10 +25,10 @@ cmd="go test -timeout=0 -v -benchtime=1x -bench=BenchmarkReexecuteRange -run=^$ --start-block=\"${START_BLOCK}\" \ --end-block=\"${END_BLOCK}\" \ ${LABELS:+--labels=\"${LABELS}\"} \ - ${METRICS_ENABLED:+--metrics-enabled=\"${METRICS_ENABLED}\"}" + ${METRICS_MODE:+--metrics-mode=\"${METRICS_MODE}\"}" if [ -n "${BENCHMARK_OUTPUT_FILE:-}" ]; then eval "$cmd" | tee "${BENCHMARK_OUTPUT_FILE}" else eval "$cmd" -fi \ No newline at end of file +fi diff --git a/tests/reexecute/c/README.md b/tests/reexecute/c/README.md index f0f8fe6aa670..6cf4e34b42ac 100644 --- a/tests/reexecute/c/README.md +++ b/tests/reexecute/c/README.md @@ -42,7 +42,18 @@ export AWS_REGION=us-east-2 ### Metrics Collection -If running with metrics collection, enabled in CI and configured locally with `METRICS_ENABLED=true`, follow the instructions in the e2e [README](../../e2e/README.md#monitoring) to set the required Prometheus environment variables. +If running locally, there are three options for metrics collection: + +- `METRICS_MODE=disabled`: no metrics are available. +- `METRICS_MODE=server-only`: starts a Prometheus server exporting VM metrics. A + link to the metrics endpoint is logged during execution. +- `METRICS_MODE=full`: starts both a Prometheus server exporting VM metrics and + a Prometheus collector. A link to the corresponding Grafana dashboard is + logged during execution. + +When utilizing the `full` options, follow the instructions in the e2e [README](../../e2e/README.md#monitoring) to set the required Prometheus environment variables. + +Running the re-execution test in CI will always set `METRICS_MODE=full`. ## Quick Start @@ -230,7 +241,7 @@ The `CONFIG` parameter currently only supports pre-defined configs and not passi The C-Chain benchmarks export VM metrics to the same Grafana instance as AvalancheGo CI: https://grafana-poc.avax-dev.network/. -To export metrics for a local run, simply set the Taskfile variable `METRICS_ENABLED=true` either via environment variable or passing it at the command line. +To export metrics for a local run, simply set the Taskfile variable `METRICS_MODE=full` either via environment variable or passing it at the command line. You can view granular C-Chain processing metrics with the label attached to this job (job="c-chain-reexecution") [here](https://grafana-poc.avax-dev.network/d/Gl1I20mnk/c-chain?orgId=1&from=now-5m&to=now&timezone=browser&var-datasource=P1809F7CD0C75ACF3&var-filter=job%7C%3D%7Cc-chain-reexecution&var-chain=C&refresh=10s). diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go index 9982da655e3d..1eaec7ea6ad0 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -48,6 +48,12 @@ import ( "github.com/ava-labs/avalanchego/vms/platformvm/warp" ) +const ( + MetricsDisabled metricsMode = iota + MetricsServerOnly + MetricsFull +) + var ( mainnetXChainID = ids.FromStringOrPanic("2oYMBNV4eNHyqk2fjjV5nVQLDbtmNJzq5s3qs3Lo6ftnC6FByM") mainnetCChainID = ids.FromStringOrPanic("2q9e4r6Mu3U68nU1fYjgbR6JvwrRx36CohpAX5UQxse55x1Q5") @@ -62,10 +68,11 @@ var ( startBlockArg uint64 endBlockArg uint64 chanSizeArg int - metricsEnabledArg bool executionTimeout time.Duration labelsArg string + metricsModeArg = MetricsDisabled + networkUUID string = uuid.NewString() labels = map[string]string{ "job": "c-chain-reexecution", @@ -94,6 +101,41 @@ var ( configBytesArg []byte ) +type metricsMode int + +func (m *metricsMode) Set(s string) error { + s = strings.ToLower(strings.TrimSpace(s)) + + switch s { + case "disabled": + *m = MetricsDisabled + case "server-only": + *m = MetricsServerOnly + case "full": + *m = MetricsFull + default: + return fmt.Errorf("invalid metrics mode: %s (valid options: disabled, server-only, full)", s) + } + return nil +} + +func (m metricsMode) String() string { + switch m { + case MetricsDisabled: + return "disabled" + case MetricsServerOnly: + return "server-only" + case MetricsFull: + return "full" + default: + return "unknown" + } +} + +func (m metricsMode) shouldStartServer() bool { return m >= MetricsServerOnly } + +func (m metricsMode) shouldStartCollector() bool { return m == MetricsFull } + func TestMain(m *testing.M) { evm.RegisterAllLibEVMExtras() @@ -104,7 +146,7 @@ func TestMain(m *testing.M) { flag.IntVar(&chanSizeArg, "chan-size", 100, "Size of the channel to use for block processing.") flag.DurationVar(&executionTimeout, "execution-timeout", 0, "Benchmark execution timeout. After this timeout has elapsed, terminate the benchmark without error. If 0, no timeout is applied.") - flag.BoolVar(&metricsEnabledArg, "metrics-enabled", false, "Enable metrics collection.") + flag.Var(&metricsModeArg, "metrics-mode", "Metrics mode: disabled (no metrics), server-only (creates Prometheus server), or full (creates Prometheus server and starts Prometheus collector)") flag.StringVar(&labelsArg, "labels", "", "Comma separated KV list of metric labels to attach to all exported metrics. Ex. \"owner=tim,runner=snoopy\"") predefinedConfigKeys := slices.Collect(maps.Keys(predefinedConfigs)) @@ -151,7 +193,7 @@ func BenchmarkReexecuteRange(b *testing.B) { startBlockArg, endBlockArg, chanSizeArg, - metricsEnabledArg, + metricsModeArg, ) }) } @@ -164,7 +206,7 @@ func benchmarkReexecuteRange( startBlock uint64, endBlock uint64, chanSize int, - metricsEnabled bool, + metricsMode metricsMode, ) { r := require.New(b) ctx := context.Background() @@ -185,9 +227,8 @@ func benchmarkReexecuteRange( r.NoError(prefixGatherer.Register("avalanche_snowman", consensusRegistry)) log := tests.NewDefaultLogger("c-chain-reexecution") - - if metricsEnabled { - collectRegistry(b, log, "c-chain-reexecution", prefixGatherer, labels) + if metricsMode.shouldStartServer() { + collectRegistry(b, log, "c-chain-reexecution", prefixGatherer, labels, metricsMode.shouldStartCollector()) } var ( @@ -554,20 +595,40 @@ func newConsensusMetrics(registry prometheus.Registerer) (*consensusMetrics, err return m, nil } -// collectRegistry starts prometheus and collects metrics from the provided gatherer. -// Attaches the provided labels + GitHub labels if available to the collected metrics. -func collectRegistry(tb testing.TB, log logging.Logger, name string, gatherer prometheus.Gatherer, labels map[string]string) { +// collectRegistry starts a Prometheus server for the provided gatherer. If +// startCollector is true, then collectRegistry also starts a Prometheus +// collector for the provided gatherer and attaches the provided labels + GitHub +// labels if available to the collected metrics. +func collectRegistry( + tb testing.TB, + log logging.Logger, + name string, + gatherer prometheus.Gatherer, + labels map[string]string, + startCollector bool, +) { r := require.New(tb) + server, err := tests.NewPrometheusServer(gatherer) + r.NoError(err) + + if !startCollector { + log.Info("metrics endpoint available", + zap.String("url", fmt.Sprintf("http://%s/ext/metrics", server.Address())), + ) + + tb.Cleanup(func() { + r.NoError(server.Stop()) + }) + return + } + startPromCtx, cancel := context.WithTimeout(context.Background(), tests.DefaultTimeout) defer cancel() logger := tests.NewDefaultLogger("prometheus") r.NoError(tmpnet.StartPrometheus(startPromCtx, logger)) - server, err := tests.NewPrometheusServer(gatherer) - r.NoError(err) - var sdConfigFilePath string tb.Cleanup(func() { // Ensure a final metrics scrape. From 848c6ade0d79f658248d600277d2ab3d1ff6b276 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Mon, 13 Oct 2025 08:56:53 -0400 Subject: [PATCH 02/26] docs: improve collectRegistry --- tests/reexecute/c/vm_reexecute_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go index 1eaec7ea6ad0..e1c9b4403f58 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -596,8 +596,8 @@ func newConsensusMetrics(registry prometheus.Registerer) (*consensusMetrics, err } // collectRegistry starts a Prometheus server for the provided gatherer. If -// startCollector is true, then collectRegistry also starts a Prometheus -// collector for the provided gatherer and attaches the provided labels + GitHub +// startCollector is true, it also starts a Prometheus collector configured to +// scrape the Prometheus server and attaches the provided labels + GitHub // labels if available to the collected metrics. func collectRegistry( tb testing.TB, From be761ac79dee396423591a4f2c8736ca93c1a9ed Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Mon, 13 Oct 2025 12:41:59 -0400 Subject: [PATCH 03/26] chore: set default to empty string --- Taskfile.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Taskfile.yml b/Taskfile.yml index 323127e15417..4b38da7f6df7 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -203,7 +203,7 @@ tasks: END_BLOCK: '{{.END_BLOCK}}' LABELS: '{{.LABELS | default ""}}' BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}' - METRICS_MODE: '{{.METRICS_MODE | default "disabled"}}' + METRICS_MODE: '{{.METRICS_MODE | default ""}}' cmd: | CURRENT_STATE_DIR={{.CURRENT_STATE_DIR}} \ BLOCK_DIR={{.BLOCK_DIR}} \ @@ -228,7 +228,7 @@ tasks: END_BLOCK: '{{.END_BLOCK | default "250000"}}' LABELS: '{{.LABELS | default ""}}' BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}' - METRICS_MODE: '{{.METRICS_MODE | default "disabled"}}' + METRICS_MODE: '{{.METRICS_MODE | default ""}}' cmds: - task: import-cchain-reexecute-range vars: From ca0b99391edf567577ebd9b057d64c255a06734f Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Mon, 13 Oct 2025 12:42:25 -0400 Subject: [PATCH 04/26] docs: benchmark script --- scripts/benchmark_cchain_range.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh index 9765eedbb610..29423f8c1777 100755 --- a/scripts/benchmark_cchain_range.sh +++ b/scripts/benchmark_cchain_range.sh @@ -10,6 +10,7 @@ set -euo pipefail # END_BLOCK: The ending block height (inclusive). # LABELS (optional): Comma-separated key=value pairs for metric labels. # BENCHMARK_OUTPUT_FILE (optional): If set, benchmark output is also written to this file. +# METRICS_MODE (optional): If set, determines the metrics mode (disabled, server-only, or full). : "${BLOCK_DIR:?BLOCK_DIR must be set}" : "${CURRENT_STATE_DIR:?CURRENT_STATE_DIR must be set}" From a7cb056696109f2f60ac980ef4341b2faba9188c Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Mon, 13 Oct 2025 14:05:29 -0400 Subject: [PATCH 05/26] chore: simplify metricsMode --- tests/reexecute/c/vm_reexecute_test.go | 53 +++++++++----------------- 1 file changed, 17 insertions(+), 36 deletions(-) diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go index e1c9b4403f58..b8fab14e7376 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -49,9 +49,9 @@ import ( ) const ( - MetricsDisabled metricsMode = iota - MetricsServerOnly - MetricsFull + MetricsDisabled = "disabled" + MetricsServerOnly = "server-only" + MetricsFull = "full" ) var ( @@ -70,8 +70,7 @@ var ( chanSizeArg int executionTimeout time.Duration labelsArg string - - metricsModeArg = MetricsDisabled + metricsModeArg string networkUUID string = uuid.NewString() labels = map[string]string{ @@ -101,38 +100,13 @@ var ( configBytesArg []byte ) -type metricsMode int - -func (m *metricsMode) Set(s string) error { - s = strings.ToLower(strings.TrimSpace(s)) - - switch s { - case "disabled": - *m = MetricsDisabled - case "server-only": - *m = MetricsServerOnly - case "full": - *m = MetricsFull - default: - return fmt.Errorf("invalid metrics mode: %s (valid options: disabled, server-only, full)", s) - } - return nil -} +type metricsMode string -func (m metricsMode) String() string { - switch m { - case MetricsDisabled: - return "disabled" - case MetricsServerOnly: - return "server-only" - case MetricsFull: - return "full" - default: - return "unknown" - } +func (m metricsMode) isValid() bool { + return m == MetricsDisabled || m == MetricsServerOnly || m == MetricsFull } -func (m metricsMode) shouldStartServer() bool { return m >= MetricsServerOnly } +func (m metricsMode) shouldStartServer() bool { return m == MetricsServerOnly || m == MetricsFull } func (m metricsMode) shouldStartCollector() bool { return m == MetricsFull } @@ -146,7 +120,8 @@ func TestMain(m *testing.M) { flag.IntVar(&chanSizeArg, "chan-size", 100, "Size of the channel to use for block processing.") flag.DurationVar(&executionTimeout, "execution-timeout", 0, "Benchmark execution timeout. After this timeout has elapsed, terminate the benchmark without error. If 0, no timeout is applied.") - flag.Var(&metricsModeArg, "metrics-mode", "Metrics mode: disabled (no metrics), server-only (creates Prometheus server), or full (creates Prometheus server and starts Prometheus collector)") + metricsModes := strings.Join([]string{MetricsDisabled, MetricsServerOnly, MetricsFull}, ", ") + flag.StringVar(&metricsModeArg, "metrics-mode", MetricsDisabled, fmt.Sprintf("Specifies the type of metrics configuration. Options include %s.", metricsModes)) flag.StringVar(&labelsArg, "labels", "", "Comma separated KV list of metric labels to attach to all exported metrics. Ex. \"owner=tim,runner=snoopy\"") predefinedConfigKeys := slices.Collect(maps.Keys(predefinedConfigs)) @@ -160,6 +135,12 @@ func TestMain(m *testing.M) { flag.Parse() + mode := metricsMode(metricsModeArg) + if !mode.isValid() { + fmt.Fprintf(os.Stderr, "invalid metrics mode %q. Valid options include %s. \n", metricsModeArg, metricsModes) + os.Exit(1) + } + customLabels, err := parseCustomLabels(labelsArg) if err != nil { fmt.Fprintf(os.Stderr, "failed to parse labels: %v\n", err) @@ -193,7 +174,7 @@ func BenchmarkReexecuteRange(b *testing.B) { startBlockArg, endBlockArg, chanSizeArg, - metricsModeArg, + metricsMode(metricsModeArg), ) }) } From c7f3185de4552ee54625443073ff4d55da48483f Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Mon, 13 Oct 2025 19:23:58 -0400 Subject: [PATCH 06/26] chore: unexport metricsMode --- tests/reexecute/c/vm_reexecute_test.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go index d25b3e3708ff..a098e55b845d 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -48,9 +48,9 @@ import ( ) const ( - MetricsDisabled = "disabled" - MetricsServerOnly = "server-only" - MetricsFull = "full" + metricsDisabled = "disabled" + metricsServerOnly = "server-only" + metricsFull = "full" ) var ( @@ -102,12 +102,12 @@ var ( type metricsMode string func (m metricsMode) isValid() bool { - return m == MetricsDisabled || m == MetricsServerOnly || m == MetricsFull + return m == metricsDisabled || m == metricsServerOnly || m == metricsFull } -func (m metricsMode) shouldStartServer() bool { return m == MetricsServerOnly || m == MetricsFull } +func (m metricsMode) shouldStartServer() bool { return m == metricsServerOnly || m == metricsFull } -func (m metricsMode) shouldStartCollector() bool { return m == MetricsFull } +func (m metricsMode) shouldStartCollector() bool { return m == metricsFull } func TestMain(m *testing.M) { evm.RegisterAllLibEVMExtras() @@ -119,8 +119,8 @@ func TestMain(m *testing.M) { flag.IntVar(&chanSizeArg, "chan-size", 100, "Size of the channel to use for block processing.") flag.DurationVar(&executionTimeout, "execution-timeout", 0, "Benchmark execution timeout. After this timeout has elapsed, terminate the benchmark without error. If 0, no timeout is applied.") - metricsModes := strings.Join([]string{MetricsDisabled, MetricsServerOnly, MetricsFull}, ", ") - flag.StringVar(&metricsModeArg, "metrics-mode", MetricsDisabled, fmt.Sprintf("Specifies the type of metrics configuration. Options include %s.", metricsModes)) + metricsModes := strings.Join([]string{metricsDisabled, metricsServerOnly, metricsFull}, ", ") + flag.StringVar(&metricsModeArg, "metrics-mode", metricsDisabled, fmt.Sprintf("Specifies the type of metrics configuration. Options include %s.", metricsModes)) flag.StringVar(&labelsArg, "labels", "", "Comma separated KV list of metric labels to attach to all exported metrics. Ex. \"owner=tim,runner=snoopy\"") predefinedConfigKeys := slices.Collect(maps.Keys(predefinedConfigs)) From 9e319d0ad4ca14d829023586fcb4adb1f42a63c2 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Mon, 13 Oct 2025 19:52:21 -0400 Subject: [PATCH 07/26] chore: clean up --- tests/reexecute/c/vm_reexecute_test.go | 49 ++++++++++++++------------ 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go index a098e55b845d..9f64a2620449 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -105,10 +105,6 @@ func (m metricsMode) isValid() bool { return m == metricsDisabled || m == metricsServerOnly || m == metricsFull } -func (m metricsMode) shouldStartServer() bool { return m == metricsServerOnly || m == metricsFull } - -func (m metricsMode) shouldStartCollector() bool { return m == metricsFull } - func TestMain(m *testing.M) { evm.RegisterAllLibEVMExtras() @@ -204,8 +200,13 @@ func benchmarkReexecuteRange( r.NoError(prefixGatherer.Register("avalanche_snowman", consensusRegistry)) log := tests.NewDefaultLogger("c-chain-reexecution") - if metricsMode.shouldStartServer() { - collectRegistry(b, log, "c-chain-reexecution", prefixGatherer, labels, metricsMode.shouldStartCollector()) + + switch metricsMode { + case metricsServerOnly: + startServer(b, log, prefixGatherer) + case metricsFull: + collectRegistry(b, log, "c-chain-reexecution", prefixGatherer, labels) + case metricsDisabled: } var ( @@ -568,33 +569,32 @@ func newConsensusMetrics(registry prometheus.Registerer) (*consensusMetrics, err return m, nil } -// collectRegistry starts a Prometheus server for the provided gatherer. If -// startCollector is true, it also starts a Prometheus collector configured to -// scrape the Prometheus server and attaches the provided labels + GitHub -// labels if available to the collected metrics. -func collectRegistry( +// startServer starts a Prometheus server for the provided gatherer. +func startServer( tb testing.TB, log logging.Logger, - name string, gatherer prometheus.Gatherer, - labels map[string]string, - startCollector bool, ) { r := require.New(tb) server, err := tests.NewPrometheusServer(gatherer) r.NoError(err) - if !startCollector { - log.Info("metrics endpoint available", - zap.String("url", fmt.Sprintf("http://%s/ext/metrics", server.Address())), - ) + log.Info("metrics endpoint available", + zap.String("url", fmt.Sprintf("http://%s/ext/metrics", server.Address())), + ) - tb.Cleanup(func() { - r.NoError(server.Stop()) - }) - return - } + tb.Cleanup(func() { + r.NoError(server.Stop()) + }) +} + +// collectRegistry starts a Prometheus server for the provided gatherer and +// starts a Prometheus collector configured to scrape the Prometheus server. +// collectRegistry also attaches the provided labels + Github labels if +// available to the collected metrics. +func collectRegistry(tb testing.TB, log logging.Logger, name string, gatherer prometheus.Gatherer, labels map[string]string) { + r := require.New(tb) startPromCtx, cancel := context.WithTimeout(context.Background(), tests.DefaultTimeout) defer cancel() @@ -602,6 +602,9 @@ func collectRegistry( logger := tests.NewDefaultLogger("prometheus") r.NoError(tmpnet.StartPrometheus(startPromCtx, logger)) + server, err := tests.NewPrometheusServer(gatherer) + r.NoError(err) + var sdConfigFilePath string tb.Cleanup(func() { // Ensure a final metrics scrape. From 2cc0a790f6f3a37ca369a78622aeabdc0b4d6310 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 14 Oct 2025 08:16:28 -0400 Subject: [PATCH 08/26] chore: self-review nits --- tests/reexecute/c/vm_reexecute_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go index 9f64a2620449..ac0f74a53f6c 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -205,7 +205,7 @@ func benchmarkReexecuteRange( case metricsServerOnly: startServer(b, log, prefixGatherer) case metricsFull: - collectRegistry(b, log, "c-chain-reexecution", prefixGatherer, labels) + startServerAndCollector(b, log, "c-chain-reexecution", prefixGatherer, labels) case metricsDisabled: } @@ -589,11 +589,11 @@ func startServer( }) } -// collectRegistry starts a Prometheus server for the provided gatherer and +// startServerAndCollector starts a Prometheus server for the provided gatherer and // starts a Prometheus collector configured to scrape the Prometheus server. -// collectRegistry also attaches the provided labels + Github labels if +// startServerAndCollector also attaches the provided labels + Github labels if // available to the collected metrics. -func collectRegistry(tb testing.TB, log logging.Logger, name string, gatherer prometheus.Gatherer, labels map[string]string) { +func startServerAndCollector(tb testing.TB, log logging.Logger, name string, gatherer prometheus.Gatherer, labels map[string]string) { r := require.New(tb) startPromCtx, cancel := context.WithTimeout(context.Background(), tests.DefaultTimeout) From feb16819cd832dc9e289b8b059178496ad2d28d3 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 14 Oct 2025 09:17:48 -0400 Subject: [PATCH 09/26] feat(reexecute/c): explicit metrics port --- Taskfile.yml | 4 ++++ scripts/benchmark_cchain_range.sh | 4 +++- tests/load/main/main.go | 2 +- tests/prometheus_server.go | 21 +++++++++++++-------- tests/reexecute/c/vm_reexecute_test.go | 22 +++++++++++++++++----- 5 files changed, 38 insertions(+), 15 deletions(-) diff --git a/Taskfile.yml b/Taskfile.yml index 4b38da7f6df7..58a1f8967c4f 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -204,6 +204,7 @@ tasks: LABELS: '{{.LABELS | default ""}}' BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}' METRICS_MODE: '{{.METRICS_MODE | default ""}}' + METRICS_SERVER_PORT: '{{.METRICS_SERVER_PORT}}' cmd: | CURRENT_STATE_DIR={{.CURRENT_STATE_DIR}} \ BLOCK_DIR={{.BLOCK_DIR}} \ @@ -214,6 +215,7 @@ tasks: LABELS={{.LABELS}} \ BENCHMARK_OUTPUT_FILE={{.BENCHMARK_OUTPUT_FILE}} \ METRICS_MODE={{.METRICS_MODE}} \ + METRICS_SERVER_PORT={{.METRICS_SERVER_PORT}} \ bash -x ./scripts/benchmark_cchain_range.sh reexecute-cchain-range-with-copied-data: @@ -229,6 +231,7 @@ tasks: LABELS: '{{.LABELS | default ""}}' BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}' METRICS_MODE: '{{.METRICS_MODE | default ""}}' + METRICS_SERVER_PORT: '{{.METRICS_SERVER_PORT}}' cmds: - task: import-cchain-reexecute-range vars: @@ -246,6 +249,7 @@ tasks: LABELS: '{{.LABELS}}' BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE}}' METRICS_MODE: '{{.METRICS_MODE}}' + METRICS_SERVER_PORT: '{{.METRICS_SERVER_PORT}}' test-bootstrap-monitor-e2e: desc: Runs bootstrap monitor e2e tests diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh index 29423f8c1777..f72feb96bc82 100755 --- a/scripts/benchmark_cchain_range.sh +++ b/scripts/benchmark_cchain_range.sh @@ -11,6 +11,7 @@ set -euo pipefail # LABELS (optional): Comma-separated key=value pairs for metric labels. # BENCHMARK_OUTPUT_FILE (optional): If set, benchmark output is also written to this file. # METRICS_MODE (optional): If set, determines the metrics mode (disabled, server-only, or full). +# METRICS_SERVER_PORT (optional): If set, determines the port the metrics server will listen to. : "${BLOCK_DIR:?BLOCK_DIR must be set}" : "${CURRENT_STATE_DIR:?CURRENT_STATE_DIR must be set}" @@ -26,7 +27,8 @@ cmd="go test -timeout=0 -v -benchtime=1x -bench=BenchmarkReexecuteRange -run=^$ --start-block=\"${START_BLOCK}\" \ --end-block=\"${END_BLOCK}\" \ ${LABELS:+--labels=\"${LABELS}\"} \ - ${METRICS_MODE:+--metrics-mode=\"${METRICS_MODE}\"}" + ${METRICS_MODE:+--metrics-mode=\"${METRICS_MODE}\"} \ + ${METRICS_SERVER_PORT:+--metrics-server-port=\"${METRICS_SERVER_PORT}\"}" if [ -n "${BENCHMARK_OUTPUT_FILE:-}" ]; then eval "$cmd" | tee "${BENCHMARK_OUTPUT_FILE}" diff --git a/tests/load/main/main.go b/tests/load/main/main.go index a2ad0c51c3a9..7a7a4d1052a9 100644 --- a/tests/load/main/main.go +++ b/tests/load/main/main.go @@ -102,7 +102,7 @@ func main() { require.NoError(err) registry := prometheus.NewRegistry() - metricsServer, err := tests.NewPrometheusServer(registry) + metricsServer, err := tests.NewPrometheusServer(registry, tests.DefaultMetricsPort) require.NoError(err) tc.DeferCleanup(func() { require.NoError(metricsServer.Stop()) diff --git a/tests/prometheus_server.go b/tests/prometheus_server.go index c87a61e7fc98..99fe18d1cf6c 100644 --- a/tests/prometheus_server.go +++ b/tests/prometheus_server.go @@ -6,6 +6,7 @@ package tests import ( "context" "errors" + "fmt" "net" "net/http" "time" @@ -14,7 +15,10 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" ) -const defaultPrometheusListenAddr = "127.0.0.1:0" +const ( + localhostAddr = "127.0.0.1" + DefaultMetricsPort = 0 +) // PrometheusServer is a HTTP server that serves Prometheus metrics from the provided // gahterer. @@ -26,27 +30,28 @@ type PrometheusServer struct { } // NewPrometheusServer creates and starts a Prometheus server with the provided gatherer -// listening on 127.0.0.1:0 and serving /ext/metrics. -func NewPrometheusServer(gatherer prometheus.Gatherer) (*PrometheusServer, error) { +// listening on 127.0.0.1:port and serving /ext/metrics. +func NewPrometheusServer(gatherer prometheus.Gatherer, port uint64) (*PrometheusServer, error) { server := &PrometheusServer{ gatherer: gatherer, } - if err := server.start(); err != nil { + prometheusListenAddr := fmt.Sprintf("%s:%d", localhostAddr, port) + if err := server.start(prometheusListenAddr); err != nil { return nil, err } return server, nil } -// start the Prometheus server on a dynamic port. -func (s *PrometheusServer) start() error { +// start the Prometheus server on prometheusListenAddr. +func (s *PrometheusServer) start(prometheusListenAddr string) error { mux := http.NewServeMux() mux.Handle("/ext/metrics", promhttp.HandlerFor(s.gatherer, promhttp.HandlerOpts{})) - listener, err := net.Listen("tcp", defaultPrometheusListenAddr) + listener, err := net.Listen("tcp", prometheusListenAddr) if err != nil { - return err + return fmt.Errorf("failed to listen on %s: %w", prometheusListenAddr, err) } s.server = http.Server{ diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go index ac0f74a53f6c..d942095b4ca4 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -70,6 +70,7 @@ var ( executionTimeout time.Duration labelsArg string metricsModeArg string + metricsServerPort uint64 networkUUID string = uuid.NewString() labels = map[string]string{ @@ -117,6 +118,7 @@ func TestMain(m *testing.M) { metricsModes := strings.Join([]string{metricsDisabled, metricsServerOnly, metricsFull}, ", ") flag.StringVar(&metricsModeArg, "metrics-mode", metricsDisabled, fmt.Sprintf("Specifies the type of metrics configuration. Options include %s.", metricsModes)) + flag.Uint64Var(&metricsServerPort, "metrics-server-port", tests.DefaultMetricsPort, "Port which metrics server will listen to (metrics mode must be either server-only or full)") flag.StringVar(&labelsArg, "labels", "", "Comma separated KV list of metric labels to attach to all exported metrics. Ex. \"owner=tim,runner=snoopy\"") predefinedConfigKeys := slices.Collect(maps.Keys(predefinedConfigs)) @@ -170,6 +172,7 @@ func BenchmarkReexecuteRange(b *testing.B) { endBlockArg, chanSizeArg, metricsMode(metricsModeArg), + metricsServerPort, ) }) } @@ -183,6 +186,7 @@ func benchmarkReexecuteRange( endBlock uint64, chanSize int, metricsMode metricsMode, + metricsServerPort uint64, ) { r := require.New(b) ctx := context.Background() @@ -203,9 +207,9 @@ func benchmarkReexecuteRange( switch metricsMode { case metricsServerOnly: - startServer(b, log, prefixGatherer) + startServer(b, log, prefixGatherer, metricsServerPort) case metricsFull: - startServerAndCollector(b, log, "c-chain-reexecution", prefixGatherer, labels) + startServerAndCollector(b, log, "c-chain-reexecution", prefixGatherer, labels, metricsServerPort) case metricsDisabled: } @@ -574,10 +578,11 @@ func startServer( tb testing.TB, log logging.Logger, gatherer prometheus.Gatherer, + port uint64, ) { r := require.New(tb) - server, err := tests.NewPrometheusServer(gatherer) + server, err := tests.NewPrometheusServer(gatherer, port) r.NoError(err) log.Info("metrics endpoint available", @@ -593,7 +598,14 @@ func startServer( // starts a Prometheus collector configured to scrape the Prometheus server. // startServerAndCollector also attaches the provided labels + Github labels if // available to the collected metrics. -func startServerAndCollector(tb testing.TB, log logging.Logger, name string, gatherer prometheus.Gatherer, labels map[string]string) { +func startServerAndCollector( + tb testing.TB, + log logging.Logger, + name string, + gatherer prometheus.Gatherer, + labels map[string]string, + port uint64, +) { r := require.New(tb) startPromCtx, cancel := context.WithTimeout(context.Background(), tests.DefaultTimeout) @@ -602,7 +614,7 @@ func startServerAndCollector(tb testing.TB, log logging.Logger, name string, gat logger := tests.NewDefaultLogger("prometheus") r.NoError(tmpnet.StartPrometheus(startPromCtx, logger)) - server, err := tests.NewPrometheusServer(gatherer) + server, err := tests.NewPrometheusServer(gatherer, port) r.NoError(err) var sdConfigFilePath string From a5d4392ef450b6b45fecedbb284d7347da8a92c2 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Mon, 20 Oct 2025 10:33:28 -0400 Subject: [PATCH 10/26] chore: nits --- tests/reexecute/c/vm_reexecute_test.go | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go index 4ac0fdeddc8d..3e3970d3708b 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -46,12 +46,6 @@ import ( "github.com/ava-labs/avalanchego/vms/platformvm/warp" ) -const ( - metricsDisabled = "disabled" - metricsServerOnly = "server-only" - metricsFull = "full" -) - var ( mainnetXChainID = ids.FromStringOrPanic("2oYMBNV4eNHyqk2fjjV5nVQLDbtmNJzq5s3qs3Lo6ftnC6FByM") mainnetCChainID = ids.FromStringOrPanic("2q9e4r6Mu3U68nU1fYjgbR6JvwrRx36CohpAX5UQxse55x1Q5") @@ -101,12 +95,6 @@ var ( configBytesArg []byte ) -type metricsMode string - -func (m metricsMode) isValid() bool { - return m == metricsDisabled || m == metricsServerOnly || m == metricsFull -} - func TestMain(m *testing.M) { evm.RegisterAllLibEVMExtras() @@ -173,6 +161,7 @@ func BenchmarkReexecuteRange(b *testing.B) { chanSizeArg, metricsServerEnabledArg, metricsCollectorEnabledArg, + metricsServerPort, ) }) } @@ -187,6 +176,7 @@ func benchmarkReexecuteRange( chanSize int, metricsServerEnabled bool, metricsCollectorEnabled bool, + metricsPort uint64, ) { r := require.New(b) ctx := context.Background() @@ -206,7 +196,7 @@ func benchmarkReexecuteRange( log := tests.NewDefaultLogger("c-chain-reexecution") if metricsServerEnabled { - serverAddr := startServer(b, log, prefixGatherer) + serverAddr := startServer(b, log, prefixGatherer, metricsPort) if metricsCollectorEnabled { startCollector(b, log, "c-chain-reexecution", labels, serverAddr) @@ -579,10 +569,11 @@ func startServer( tb testing.TB, log logging.Logger, gatherer prometheus.Gatherer, + port uint64, ) string { r := require.New(tb) - server, err := tests.NewPrometheusServer(gatherer) + server, err := tests.NewPrometheusServer(gatherer, port) r.NoError(err) log.Info("metrics endpoint available", From c67da7fceb44bc3fa509be0683ef393ee63435c3 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Mon, 20 Oct 2025 14:31:22 -0400 Subject: [PATCH 11/26] chore: nits --- tests/prometheus_server.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/prometheus_server.go b/tests/prometheus_server.go index 99fe18d1cf6c..b5c52af947dc 100644 --- a/tests/prometheus_server.go +++ b/tests/prometheus_server.go @@ -16,8 +16,9 @@ import ( ) const ( - localhostAddr = "127.0.0.1" DefaultMetricsPort = 0 + + localhostAddr = "127.0.0.1" ) // PrometheusServer is a HTTP server that serves Prometheus metrics from the provided @@ -36,22 +37,22 @@ func NewPrometheusServer(gatherer prometheus.Gatherer, port uint64) (*Prometheus gatherer: gatherer, } - prometheusListenAddr := fmt.Sprintf("%s:%d", localhostAddr, port) - if err := server.start(prometheusListenAddr); err != nil { + serverAddress := fmt.Sprintf("%s:%d", localhostAddr, port) + if err := server.start(serverAddress); err != nil { return nil, err } return server, nil } -// start the Prometheus server on prometheusListenAddr. -func (s *PrometheusServer) start(prometheusListenAddr string) error { +// start the Prometheus server on address. +func (s *PrometheusServer) start(address string) error { mux := http.NewServeMux() mux.Handle("/ext/metrics", promhttp.HandlerFor(s.gatherer, promhttp.HandlerOpts{})) - listener, err := net.Listen("tcp", prometheusListenAddr) + listener, err := net.Listen("tcp", address) if err != nil { - return fmt.Errorf("failed to listen on %s: %w", prometheusListenAddr, err) + return fmt.Errorf("failed to listen on %s: %w", address, err) } s.server = http.Server{ From f5e2e14b8e3f93665f6e5c8370095f3ca6fd0bd6 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Mon, 20 Oct 2025 14:40:59 -0400 Subject: [PATCH 12/26] chore: unexport consts --- tests/load/main/main.go | 2 +- tests/prometheus_server.go | 13 +++++++++---- tests/reexecute/c/vm_reexecute_test.go | 4 ++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/tests/load/main/main.go b/tests/load/main/main.go index 7a7a4d1052a9..a2ad0c51c3a9 100644 --- a/tests/load/main/main.go +++ b/tests/load/main/main.go @@ -102,7 +102,7 @@ func main() { require.NoError(err) registry := prometheus.NewRegistry() - metricsServer, err := tests.NewPrometheusServer(registry, tests.DefaultMetricsPort) + metricsServer, err := tests.NewPrometheusServer(registry) require.NoError(err) tc.DeferCleanup(func() { require.NoError(metricsServer.Stop()) diff --git a/tests/prometheus_server.go b/tests/prometheus_server.go index b5c52af947dc..3b48226f0ac7 100644 --- a/tests/prometheus_server.go +++ b/tests/prometheus_server.go @@ -16,9 +16,8 @@ import ( ) const ( - DefaultMetricsPort = 0 - - localhostAddr = "127.0.0.1" + localhostAddr = "127.0.0.1" + defaultMetricsPort = 0 ) // PrometheusServer is a HTTP server that serves Prometheus metrics from the provided @@ -30,9 +29,15 @@ type PrometheusServer struct { errChan chan error } +// NewPrometheusServer creates and starts a Prometheus server with the provided gatherer +// listening on 127.0.0.1:0 and serving /ext/metrics. +func NewPrometheusServer(gatherer prometheus.Gatherer) (*PrometheusServer, error) { + return NewPrometheusServerWithPort(gatherer, defaultMetricsPort) +} + // NewPrometheusServer creates and starts a Prometheus server with the provided gatherer // listening on 127.0.0.1:port and serving /ext/metrics. -func NewPrometheusServer(gatherer prometheus.Gatherer, port uint64) (*PrometheusServer, error) { +func NewPrometheusServerWithPort(gatherer prometheus.Gatherer, port uint64) (*PrometheusServer, error) { server := &PrometheusServer{ gatherer: gatherer, } diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go index 3e3970d3708b..4f8ad312833f 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -107,7 +107,7 @@ func TestMain(m *testing.M) { flag.BoolVar(&metricsServerEnabledArg, "metrics-server-enabled", false, "Whether to enable the metrics server.") flag.BoolVar(&metricsCollectorEnabledArg, "metrics-collector-enabled", false, "Whether to enable the metrics collector (if true, then metrics-server-enabled must be true as well).") - flag.Uint64Var(&metricsServerPort, "metrics-server-port", tests.DefaultMetricsPort, "Port which metrics server will listen to (metrics mode must be either server-only or full)") + flag.Uint64Var(&metricsServerPort, "metrics-server-port", metricsServerPort, "Port which metrics server will listen to (metrics mode must be either server-only or full)") flag.StringVar(&labelsArg, "labels", "", "Comma separated KV list of metric labels to attach to all exported metrics. Ex. \"owner=tim,runner=snoopy\"") predefinedConfigKeys := slices.Collect(maps.Keys(predefinedConfigs)) @@ -573,7 +573,7 @@ func startServer( ) string { r := require.New(tb) - server, err := tests.NewPrometheusServer(gatherer, port) + server, err := tests.NewPrometheusServerWithPort(gatherer, port) r.NoError(err) log.Info("metrics endpoint available", From d871c75ef4c1c301fa3518f0db213fc5624c66b4 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Mon, 20 Oct 2025 14:44:55 -0400 Subject: [PATCH 13/26] chore: README --- tests/reexecute/c/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/reexecute/c/README.md b/tests/reexecute/c/README.md index 07b01d4d8117..a020165234eb 100644 --- a/tests/reexecute/c/README.md +++ b/tests/reexecute/c/README.md @@ -46,6 +46,7 @@ If running locally, metrics collection can be customized via the following param - `METRICS_SERVER_ENABLED`: starts a Prometheus server exporting VM metrics. - `METRICS_COLLECTOR_ENABLED`: starts a Prometheus collector (if enabled, then `METRICS_SERVER_ENABLED` must be enabled as well). +- `METRICS_SERVER_PORT`: determines the port the metrics server will listen to (set to `0` by default) When utilizing the metrics collector feature, follow the instructions in the e2e [README](../../e2e/README.md#monitoring) to set the required Prometheus environment variables. From 540d60f17553ecbc6a56c0d753c6d88894ee637d Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Mon, 20 Oct 2025 14:48:04 -0400 Subject: [PATCH 14/26] chore: nit --- tests/reexecute/c/vm_reexecute_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go index 4f8ad312833f..452ecf95bd72 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -107,7 +107,7 @@ func TestMain(m *testing.M) { flag.BoolVar(&metricsServerEnabledArg, "metrics-server-enabled", false, "Whether to enable the metrics server.") flag.BoolVar(&metricsCollectorEnabledArg, "metrics-collector-enabled", false, "Whether to enable the metrics collector (if true, then metrics-server-enabled must be true as well).") - flag.Uint64Var(&metricsServerPort, "metrics-server-port", metricsServerPort, "Port which metrics server will listen to (metrics mode must be either server-only or full)") + flag.Uint64Var(&metricsServerPort, "metrics-server-port", metricsServerPort, "Port which the metrics server will listen to") flag.StringVar(&labelsArg, "labels", "", "Comma separated KV list of metric labels to attach to all exported metrics. Ex. \"owner=tim,runner=snoopy\"") predefinedConfigKeys := slices.Collect(maps.Keys(predefinedConfigs)) From bca6d48cc62c47dc9945919869d61ad71202a59b Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Mon, 20 Oct 2025 15:41:12 -0400 Subject: [PATCH 15/26] chore: nit --- Taskfile.yml | 4 ++-- scripts/benchmark_cchain_range.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Taskfile.yml b/Taskfile.yml index 3e60ae2b8856..f669240dcd4c 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -217,7 +217,7 @@ tasks: BENCHMARK_OUTPUT_FILE={{.BENCHMARK_OUTPUT_FILE}} \ METRICS_SERVER_ENABLED={{.METRICS_SERVER_ENABLED}} \ METRICS_COLLECTOR_ENABLED={{.METRICS_COLLECTOR_ENABLED}} \ - METRICS_SERVER_PORT={{.METRICS_COLLECTOR_SERVER_PORT}} \ + METRICS_SERVER_PORT={{.METRICS_SERVER_PORT}} \ bash -x ./scripts/benchmark_cchain_range.sh reexecute-cchain-range-with-copied-data: @@ -253,7 +253,7 @@ tasks: BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE}}' METRICS_SERVER_ENABLED: '{{.METRICS_SERVER_ENABLED}}' METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED}}' - METRICS_SERVER_PORT: '{{.METRICS_COLLECTOR_SERVER_PORT}}' + METRICS_SERVER_PORT: '{{.METRICS_SERVER_PORT}}' test-bootstrap-monitor-e2e: desc: Runs bootstrap monitor e2e tests diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh index 35c0763a2fbb..481197216dff 100755 --- a/scripts/benchmark_cchain_range.sh +++ b/scripts/benchmark_cchain_range.sh @@ -30,7 +30,7 @@ cmd="go test -timeout=0 -v -benchtime=1x -bench=BenchmarkReexecuteRange -run=^$ ${LABELS:+--labels=\"${LABELS}\"} \ ${METRICS_SERVER_ENABLED:+--metrics-server-enabled=\"${METRICS_SERVER_ENABLED}\"} \ ${METRICS_COLLECTOR_ENABLED:+--metrics-collector-enabled=\"${METRICS_COLLECTOR_ENABLED}\"} \ - ${METRICS_SERVER_PORT:+--metrics-collector-server-port=\"${METRICS_COLLECTOR_SERVER_PORT}\"}" + ${METRICS_SERVER_PORT:+--metrics-server-port=\"${METRICS_SERVER_PORT}\"}" if [ -n "${BENCHMARK_OUTPUT_FILE:-}" ]; then eval "$cmd" | tee "${BENCHMARK_OUTPUT_FILE}" From ac02cbf380a72f4752977dc464a940798a0e2735 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 28 Oct 2025 08:15:45 -0400 Subject: [PATCH 16/26] docs: func name --- tests/prometheus_server.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/prometheus_server.go b/tests/prometheus_server.go index 3b48226f0ac7..07b02c4f4c37 100644 --- a/tests/prometheus_server.go +++ b/tests/prometheus_server.go @@ -35,7 +35,7 @@ func NewPrometheusServer(gatherer prometheus.Gatherer) (*PrometheusServer, error return NewPrometheusServerWithPort(gatherer, defaultMetricsPort) } -// NewPrometheusServer creates and starts a Prometheus server with the provided gatherer +// NewPrometheusServerWithPort creates and starts a Prometheus server with the provided gatherer // listening on 127.0.0.1:port and serving /ext/metrics. func NewPrometheusServerWithPort(gatherer prometheus.Gatherer, port uint64) (*PrometheusServer, error) { server := &PrometheusServer{ From 46542061fe98287717ea358d33ea3ef0ea7ae17e Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 28 Oct 2025 14:26:27 -0400 Subject: [PATCH 17/26] chore: unify server flags --- .../c-chain-reexecution-benchmark/action.yml | 2 +- Taskfile.yml | 11 +++----- scripts/benchmark_cchain_range.sh | 6 ++--- tests/reexecute/c/vm_reexecute_test.go | 25 +++++++++++-------- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/.github/actions/c-chain-reexecution-benchmark/action.yml b/.github/actions/c-chain-reexecution-benchmark/action.yml index 3385ff50a0c1..d1658d62d117 100644 --- a/.github/actions/c-chain-reexecution-benchmark/action.yml +++ b/.github/actions/c-chain-reexecution-benchmark/action.yml @@ -95,7 +95,7 @@ runs: LABELS=${{ env.LABELS }} \ BENCHMARK_OUTPUT_FILE=${{ env.BENCHMARK_OUTPUT_FILE }} \ RUNNER_NAME=${{ inputs.runner_name }} \ - METRICS_SERVER_ENABLED=true \ + METRICS_SERVER_PORT=0 \ METRICS_COLLECTOR_ENABLED=true prometheus_url: ${{ inputs.prometheus-url }} prometheus_push_url: ${{ inputs.prometheus-push-url }} diff --git a/Taskfile.yml b/Taskfile.yml index 1f5acb199cd2..8713a4024a9b 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -207,9 +207,8 @@ tasks: END_BLOCK: '{{.END_BLOCK}}' LABELS: '{{.LABELS | default ""}}' BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}' - METRICS_SERVER_ENABLED: '{{.METRICS_SERVER_ENABLED | default "false"}}' - METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED | default "false"}}' METRICS_SERVER_PORT: '{{.METRICS_SERVER_PORT}}' + METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED | default "false"}}' cmd: | CURRENT_STATE_DIR={{.CURRENT_STATE_DIR}} \ BLOCK_DIR={{.BLOCK_DIR}} \ @@ -219,9 +218,9 @@ tasks: END_BLOCK={{.END_BLOCK}} \ LABELS={{.LABELS}} \ BENCHMARK_OUTPUT_FILE={{.BENCHMARK_OUTPUT_FILE}} \ - METRICS_SERVER_ENABLED={{.METRICS_SERVER_ENABLED}} \ METRICS_COLLECTOR_ENABLED={{.METRICS_COLLECTOR_ENABLED}} \ METRICS_SERVER_PORT={{.METRICS_SERVER_PORT}} \ + METRICS_COLLECTOR_ENABLED={{.METRICS_COLLECTOR_ENABLED}} \ bash -x ./scripts/benchmark_cchain_range.sh reexecute-cchain-range-with-copied-data: @@ -236,9 +235,8 @@ tasks: END_BLOCK: '{{.END_BLOCK | default "250000"}}' LABELS: '{{.LABELS | default ""}}' BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}' - METRICS_SERVER_ENABLED: '{{.METRICS_SERVER_ENABLED | default "false"}}' - METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED | default "false"}}' METRICS_SERVER_PORT: '{{.METRICS_SERVER_PORT}}' + METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED | default "false"}}' cmds: - task: import-cchain-reexecute-range vars: @@ -255,9 +253,8 @@ tasks: END_BLOCK: '{{.END_BLOCK}}' LABELS: '{{.LABELS}}' BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE}}' - METRICS_SERVER_ENABLED: '{{.METRICS_SERVER_ENABLED}}' - METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED}}' METRICS_SERVER_PORT: '{{.METRICS_SERVER_PORT}}' + METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED}}' test-bootstrap-monitor-e2e: desc: Runs bootstrap monitor e2e tests diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh index 481197216dff..9038516a68d3 100755 --- a/scripts/benchmark_cchain_range.sh +++ b/scripts/benchmark_cchain_range.sh @@ -10,7 +10,6 @@ set -euo pipefail # END_BLOCK: The ending block height (inclusive). # LABELS (optional): Comma-separated key=value pairs for metric labels. # BENCHMARK_OUTPUT_FILE (optional): If set, benchmark output is also written to this file. -# METRICS_SERVER_ENABLED (optional): If set, enables the metrics server. # METRICS_COLLECTOR_ENABLED (optional): If set, enables the metrics collector. # METRICS_SERVER_PORT (optional): If set, determines the port the metrics server will listen to. @@ -28,9 +27,8 @@ cmd="go test -timeout=0 -v -benchtime=1x -bench=BenchmarkReexecuteRange -run=^$ --start-block=\"${START_BLOCK}\" \ --end-block=\"${END_BLOCK}\" \ ${LABELS:+--labels=\"${LABELS}\"} \ - ${METRICS_SERVER_ENABLED:+--metrics-server-enabled=\"${METRICS_SERVER_ENABLED}\"} \ - ${METRICS_COLLECTOR_ENABLED:+--metrics-collector-enabled=\"${METRICS_COLLECTOR_ENABLED}\"} \ - ${METRICS_SERVER_PORT:+--metrics-server-port=\"${METRICS_SERVER_PORT}\"}" + ${METRICS_SERVER_PORT:+--metrics-server-port=\"${METRICS_SERVER_PORT}\"}" \ + ${METRICS_COLLECTOR_ENABLED:+--metrics-collector-enabled=\"${METRICS_COLLECTOR_ENABLED}\"} if [ -n "${BENCHMARK_OUTPUT_FILE:-}" ]; then eval "$cmd" | tee "${BENCHMARK_OUTPUT_FILE}" diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go index 02bc153631c6..beb941fea2b2 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -63,9 +63,8 @@ var ( executionTimeout time.Duration labelsArg string - metricsServerEnabledArg bool + metricsServerPort *uint64 metricsCollectorEnabledArg bool - metricsServerPort uint64 networkUUID string = uuid.NewString() labels = map[string]string{ @@ -105,9 +104,17 @@ func TestMain(m *testing.M) { flag.IntVar(&chanSizeArg, "chan-size", 100, "Size of the channel to use for block processing.") flag.DurationVar(&executionTimeout, "execution-timeout", 0, "Benchmark execution timeout. After this timeout has elapsed, terminate the benchmark without error. If 0, no timeout is applied.") - flag.BoolVar(&metricsServerEnabledArg, "metrics-server-enabled", false, "Whether to enable the metrics server.") + flag.Func("metrics-server-port", "Port which the metrics server will listen to", func(s string) error { + port, err := strconv.ParseUint(s, 10, 64) + if err != nil { + return err + } + + metricsServerPort = new(uint64) + *metricsServerPort = port + return nil + }) flag.BoolVar(&metricsCollectorEnabledArg, "metrics-collector-enabled", false, "Whether to enable the metrics collector (if true, then metrics-server-enabled must be true as well).") - flag.Uint64Var(&metricsServerPort, "metrics-server-port", metricsServerPort, "Port which the metrics server will listen to") flag.StringVar(&labelsArg, "labels", "", "Comma separated KV list of metric labels to attach to all exported metrics. Ex. \"owner=tim,runner=snoopy\"") predefinedConfigKeys := slices.Collect(maps.Keys(predefinedConfigs)) @@ -121,7 +128,7 @@ func TestMain(m *testing.M) { flag.Parse() - if metricsCollectorEnabledArg && !metricsServerEnabledArg { + if metricsCollectorEnabledArg && metricsServerPort == nil { fmt.Fprint(os.Stderr, "metrics collector is enabled but metrics server is disabled.\n") os.Exit(1) } @@ -159,7 +166,6 @@ func BenchmarkReexecuteRange(b *testing.B) { startBlockArg, endBlockArg, chanSizeArg, - metricsServerEnabledArg, metricsCollectorEnabledArg, metricsServerPort, ) @@ -174,9 +180,8 @@ func benchmarkReexecuteRange( startBlock uint64, endBlock uint64, chanSize int, - metricsServerEnabled bool, metricsCollectorEnabled bool, - metricsPort uint64, + metricsPort *uint64, ) { r := require.New(b) ctx := b.Context() @@ -195,8 +200,8 @@ func benchmarkReexecuteRange( log := tests.NewDefaultLogger("c-chain-reexecution") - if metricsServerEnabled { - serverAddr := startServer(b, log, prefixGatherer, metricsPort) + if metricsPort != nil { + serverAddr := startServer(b, log, prefixGatherer, *metricsPort) if metricsCollectorEnabled { startCollector(b, log, "c-chain-reexecution", labels, serverAddr) From d18bbb8c7f905917b865cf2cb75b96d40ce267c2 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 28 Oct 2025 14:28:31 -0400 Subject: [PATCH 18/26] chore: nit --- scripts/benchmark_cchain_range.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh index 9038516a68d3..b36d54e5ffa0 100755 --- a/scripts/benchmark_cchain_range.sh +++ b/scripts/benchmark_cchain_range.sh @@ -10,8 +10,8 @@ set -euo pipefail # END_BLOCK: The ending block height (inclusive). # LABELS (optional): Comma-separated key=value pairs for metric labels. # BENCHMARK_OUTPUT_FILE (optional): If set, benchmark output is also written to this file. -# METRICS_COLLECTOR_ENABLED (optional): If set, enables the metrics collector. # METRICS_SERVER_PORT (optional): If set, determines the port the metrics server will listen to. +# METRICS_COLLECTOR_ENABLED (optional): If set, enables the metrics collector. : "${BLOCK_DIR:?BLOCK_DIR must be set}" : "${CURRENT_STATE_DIR:?CURRENT_STATE_DIR must be set}" From df721f1f21145d1e0ad2bdd7e306cf002fd736ac Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 28 Oct 2025 14:38:24 -0400 Subject: [PATCH 19/26] fix: quote --- scripts/benchmark_cchain_range.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh index b36d54e5ffa0..b2ed5e7b015b 100755 --- a/scripts/benchmark_cchain_range.sh +++ b/scripts/benchmark_cchain_range.sh @@ -27,8 +27,8 @@ cmd="go test -timeout=0 -v -benchtime=1x -bench=BenchmarkReexecuteRange -run=^$ --start-block=\"${START_BLOCK}\" \ --end-block=\"${END_BLOCK}\" \ ${LABELS:+--labels=\"${LABELS}\"} \ - ${METRICS_SERVER_PORT:+--metrics-server-port=\"${METRICS_SERVER_PORT}\"}" \ - ${METRICS_COLLECTOR_ENABLED:+--metrics-collector-enabled=\"${METRICS_COLLECTOR_ENABLED}\"} + ${METRICS_SERVER_PORT:+--metrics-server-port=\"${METRICS_SERVER_PORT}\"} \ + ${METRICS_COLLECTOR_ENABLED:+--metrics-collector-enabled=\"${METRICS_COLLECTOR_ENABLED}\"}" if [ -n "${BENCHMARK_OUTPUT_FILE:-}" ]; then eval "$cmd" | tee "${BENCHMARK_OUTPUT_FILE}" From 527b652f0ec6d1ec2e23809c013c576a1c6d62f9 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 28 Oct 2025 14:39:20 -0400 Subject: [PATCH 20/26] feat: implicitly enable metrics server if collector is enabled --- tests/reexecute/c/vm_reexecute_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go index beb941fea2b2..e3a421dcc4c2 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -129,8 +129,7 @@ func TestMain(m *testing.M) { flag.Parse() if metricsCollectorEnabledArg && metricsServerPort == nil { - fmt.Fprint(os.Stderr, "metrics collector is enabled but metrics server is disabled.\n") - os.Exit(1) + metricsServerPort = new(uint64) } customLabels, err := parseCustomLabels(labelsArg) From f2fc7948393d0cce17aa105b0acfceb335ad3dab Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 28 Oct 2025 14:43:21 -0400 Subject: [PATCH 21/26] chore: nits --- .github/actions/c-chain-reexecution-benchmark/action.yml | 1 - Taskfile.yml | 1 - 2 files changed, 2 deletions(-) diff --git a/.github/actions/c-chain-reexecution-benchmark/action.yml b/.github/actions/c-chain-reexecution-benchmark/action.yml index d1658d62d117..27149323332c 100644 --- a/.github/actions/c-chain-reexecution-benchmark/action.yml +++ b/.github/actions/c-chain-reexecution-benchmark/action.yml @@ -95,7 +95,6 @@ runs: LABELS=${{ env.LABELS }} \ BENCHMARK_OUTPUT_FILE=${{ env.BENCHMARK_OUTPUT_FILE }} \ RUNNER_NAME=${{ inputs.runner_name }} \ - METRICS_SERVER_PORT=0 \ METRICS_COLLECTOR_ENABLED=true prometheus_url: ${{ inputs.prometheus-url }} prometheus_push_url: ${{ inputs.prometheus-push-url }} diff --git a/Taskfile.yml b/Taskfile.yml index 8713a4024a9b..9abe50402d75 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -218,7 +218,6 @@ tasks: END_BLOCK={{.END_BLOCK}} \ LABELS={{.LABELS}} \ BENCHMARK_OUTPUT_FILE={{.BENCHMARK_OUTPUT_FILE}} \ - METRICS_COLLECTOR_ENABLED={{.METRICS_COLLECTOR_ENABLED}} \ METRICS_SERVER_PORT={{.METRICS_SERVER_PORT}} \ METRICS_COLLECTOR_ENABLED={{.METRICS_COLLECTOR_ENABLED}} \ bash -x ./scripts/benchmark_cchain_range.sh From f156a76da198df390d41e51dc9ce8fdecc230387 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 28 Oct 2025 14:50:35 -0400 Subject: [PATCH 22/26] docs: README --- tests/reexecute/c/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/reexecute/c/README.md b/tests/reexecute/c/README.md index a020165234eb..d4cfdc00b49a 100644 --- a/tests/reexecute/c/README.md +++ b/tests/reexecute/c/README.md @@ -44,13 +44,13 @@ export AWS_REGION=us-east-2 If running locally, metrics collection can be customized via the following parameters: -- `METRICS_SERVER_ENABLED`: starts a Prometheus server exporting VM metrics. -- `METRICS_COLLECTOR_ENABLED`: starts a Prometheus collector (if enabled, then `METRICS_SERVER_ENABLED` must be enabled as well). -- `METRICS_SERVER_PORT`: determines the port the metrics server will listen to (set to `0` by default) + +- `METRICS_SERVER_PORT`: if set, starts a metrics server and sets the port the server will listen to. +- `METRICS_COLLECTOR_ENABLED`: starts a Prometheus collector. If `METRICS_SERVER_PORT` is not set, enabling the collector implicitly sets `METRICS_SERVER_PORT` to `0`. When utilizing the metrics collector feature, follow the instructions in the e2e [README](../../e2e/README.md#monitoring) to set the required Prometheus environment variables. -Running the re-execution test in CI will always set `METRICS_SERVER_ENABLED=true` and `METRICS_COLLECTOR_ENABLED=true`. +Running the re-execution test in CI will always set `METRICS_COLLECTOR_ENABLED=true`. ## Quick Start @@ -238,7 +238,7 @@ The `CONFIG` parameter currently only supports pre-defined configs and not passi The C-Chain benchmarks export VM metrics to the same Grafana instance as AvalancheGo CI: https://grafana-poc.avax-dev.network/. -To export metrics for a local run, simply set the Taskfile variables `METRICS_SERVER_ENABLED=true` and `METRICS_COLLECTOR_ENABLED=true` either via environment variable or passing it at the command line. +To export metrics for a local run, simply set the Taskfile variable `METRICS_COLLECTOR_ENABLED=true` either via environment variable or passing it at the command line. You can view granular C-Chain processing metrics with the label attached to this job (job="c-chain-reexecution") [here](https://grafana-poc.avax-dev.network/d/Gl1I20mnk/c-chain?orgId=1&from=now-5m&to=now&timezone=browser&var-datasource=P1809F7CD0C75ACF3&var-filter=job%7C%3D%7Cc-chain-reexecution&var-chain=C&refresh=10s). From 5bb04be7d5a760569a28547e856c7d348c56eb97 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 28 Oct 2025 14:51:20 -0400 Subject: [PATCH 23/26] chore: nit --- tests/reexecute/c/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/reexecute/c/README.md b/tests/reexecute/c/README.md index d4cfdc00b49a..314d15a6c5f8 100644 --- a/tests/reexecute/c/README.md +++ b/tests/reexecute/c/README.md @@ -44,7 +44,6 @@ export AWS_REGION=us-east-2 If running locally, metrics collection can be customized via the following parameters: - - `METRICS_SERVER_PORT`: if set, starts a metrics server and sets the port the server will listen to. - `METRICS_COLLECTOR_ENABLED`: starts a Prometheus collector. If `METRICS_SERVER_PORT` is not set, enabling the collector implicitly sets `METRICS_SERVER_PORT` to `0`. From d0b0083cc9492fa97843d39bff341540879f7b88 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 28 Oct 2025 15:08:45 -0400 Subject: [PATCH 24/26] docs: flag usage --- tests/reexecute/c/vm_reexecute_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go index e3a421dcc4c2..b7d6d7158c7b 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -104,7 +104,7 @@ func TestMain(m *testing.M) { flag.IntVar(&chanSizeArg, "chan-size", 100, "Size of the channel to use for block processing.") flag.DurationVar(&executionTimeout, "execution-timeout", 0, "Benchmark execution timeout. After this timeout has elapsed, terminate the benchmark without error. If 0, no timeout is applied.") - flag.Func("metrics-server-port", "Port which the metrics server will listen to", func(s string) error { + flag.Func("metrics-server-port", "Starts a metrics server and sets the port it will listen to", func(s string) error { port, err := strconv.ParseUint(s, 10, 64) if err != nil { return err From 830e8c1303d4cd93bd61f3192b3a9ddc0dc63c2d Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Tue, 28 Oct 2025 15:12:00 -0400 Subject: [PATCH 25/26] chore: nits --- tests/reexecute/c/README.md | 2 +- tests/reexecute/c/vm_reexecute_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/reexecute/c/README.md b/tests/reexecute/c/README.md index 314d15a6c5f8..870e6bca12e8 100644 --- a/tests/reexecute/c/README.md +++ b/tests/reexecute/c/README.md @@ -44,7 +44,7 @@ export AWS_REGION=us-east-2 If running locally, metrics collection can be customized via the following parameters: -- `METRICS_SERVER_PORT`: if set, starts a metrics server and sets the port the server will listen to. +- `METRICS_SERVER_PORT`: if set, starts a Prometheus server exporting VM metrics and sets the port the server will listen to. - `METRICS_COLLECTOR_ENABLED`: starts a Prometheus collector. If `METRICS_SERVER_PORT` is not set, enabling the collector implicitly sets `METRICS_SERVER_PORT` to `0`. When utilizing the metrics collector feature, follow the instructions in the e2e [README](../../e2e/README.md#monitoring) to set the required Prometheus environment variables. diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go index b7d6d7158c7b..43cebd37301a 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -165,8 +165,8 @@ func BenchmarkReexecuteRange(b *testing.B) { startBlockArg, endBlockArg, chanSizeArg, - metricsCollectorEnabledArg, metricsServerPort, + metricsCollectorEnabledArg, ) }) } @@ -179,8 +179,8 @@ func benchmarkReexecuteRange( startBlock uint64, endBlock uint64, chanSize int, - metricsCollectorEnabled bool, metricsPort *uint64, + metricsCollectorEnabled bool, ) { r := require.New(b) ctx := b.Context() From 3651f47d23ab1bd1ffa627acea5493689bb43059 Mon Sep 17 00:00:00 2001 From: Rodrigo Villar Date: Wed, 29 Oct 2025 09:16:19 -0400 Subject: [PATCH 26/26] chore: revert unification of metrics server param --- .../c-chain-reexecution-benchmark/action.yml | 1 + Taskfile.yml | 4 +++ scripts/benchmark_cchain_range.sh | 2 ++ tests/reexecute/c/README.md | 9 +++--- tests/reexecute/c/vm_reexecute_test.go | 29 ++++++++----------- 5 files changed, 24 insertions(+), 21 deletions(-) diff --git a/.github/actions/c-chain-reexecution-benchmark/action.yml b/.github/actions/c-chain-reexecution-benchmark/action.yml index 27149323332c..3385ff50a0c1 100644 --- a/.github/actions/c-chain-reexecution-benchmark/action.yml +++ b/.github/actions/c-chain-reexecution-benchmark/action.yml @@ -95,6 +95,7 @@ runs: LABELS=${{ env.LABELS }} \ BENCHMARK_OUTPUT_FILE=${{ env.BENCHMARK_OUTPUT_FILE }} \ RUNNER_NAME=${{ inputs.runner_name }} \ + METRICS_SERVER_ENABLED=true \ METRICS_COLLECTOR_ENABLED=true prometheus_url: ${{ inputs.prometheus-url }} prometheus_push_url: ${{ inputs.prometheus-push-url }} diff --git a/Taskfile.yml b/Taskfile.yml index 9abe50402d75..fa229d51d012 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -207,6 +207,7 @@ tasks: END_BLOCK: '{{.END_BLOCK}}' LABELS: '{{.LABELS | default ""}}' BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}' + METRICS_SERVER_ENABLED: '{{.METRICS_SERVER_ENABLED | default "false"}}' METRICS_SERVER_PORT: '{{.METRICS_SERVER_PORT}}' METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED | default "false"}}' cmd: | @@ -218,6 +219,7 @@ tasks: END_BLOCK={{.END_BLOCK}} \ LABELS={{.LABELS}} \ BENCHMARK_OUTPUT_FILE={{.BENCHMARK_OUTPUT_FILE}} \ + METRICS_SERVER_ENABLED={{.METRICS_SERVER_ENABLED}} \ METRICS_SERVER_PORT={{.METRICS_SERVER_PORT}} \ METRICS_COLLECTOR_ENABLED={{.METRICS_COLLECTOR_ENABLED}} \ bash -x ./scripts/benchmark_cchain_range.sh @@ -234,6 +236,7 @@ tasks: END_BLOCK: '{{.END_BLOCK | default "250000"}}' LABELS: '{{.LABELS | default ""}}' BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}' + METRICS_SERVER_ENABLED: '{{.METRICS_SERVER_ENABLED | default "false"}}' METRICS_SERVER_PORT: '{{.METRICS_SERVER_PORT}}' METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED | default "false"}}' cmds: @@ -252,6 +255,7 @@ tasks: END_BLOCK: '{{.END_BLOCK}}' LABELS: '{{.LABELS}}' BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE}}' + METRICS_SERVER_ENABLED: '{{.METRICS_SERVER_ENABLED}}' METRICS_SERVER_PORT: '{{.METRICS_SERVER_PORT}}' METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED}}' diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh index b2ed5e7b015b..0d9d951e7194 100755 --- a/scripts/benchmark_cchain_range.sh +++ b/scripts/benchmark_cchain_range.sh @@ -10,6 +10,7 @@ set -euo pipefail # END_BLOCK: The ending block height (inclusive). # LABELS (optional): Comma-separated key=value pairs for metric labels. # BENCHMARK_OUTPUT_FILE (optional): If set, benchmark output is also written to this file. +# METRICS_SERVER_ENABLED (optional): If set, enables the metrics server. # METRICS_SERVER_PORT (optional): If set, determines the port the metrics server will listen to. # METRICS_COLLECTOR_ENABLED (optional): If set, enables the metrics collector. @@ -27,6 +28,7 @@ cmd="go test -timeout=0 -v -benchtime=1x -bench=BenchmarkReexecuteRange -run=^$ --start-block=\"${START_BLOCK}\" \ --end-block=\"${END_BLOCK}\" \ ${LABELS:+--labels=\"${LABELS}\"} \ + ${METRICS_SERVER_ENABLED:+--metrics-server-enabled=\"${METRICS_SERVER_ENABLED}\"} \ ${METRICS_SERVER_PORT:+--metrics-server-port=\"${METRICS_SERVER_PORT}\"} \ ${METRICS_COLLECTOR_ENABLED:+--metrics-collector-enabled=\"${METRICS_COLLECTOR_ENABLED}\"}" diff --git a/tests/reexecute/c/README.md b/tests/reexecute/c/README.md index 870e6bca12e8..03d918cc36a6 100644 --- a/tests/reexecute/c/README.md +++ b/tests/reexecute/c/README.md @@ -44,12 +44,13 @@ export AWS_REGION=us-east-2 If running locally, metrics collection can be customized via the following parameters: -- `METRICS_SERVER_PORT`: if set, starts a Prometheus server exporting VM metrics and sets the port the server will listen to. -- `METRICS_COLLECTOR_ENABLED`: starts a Prometheus collector. If `METRICS_SERVER_PORT` is not set, enabling the collector implicitly sets `METRICS_SERVER_PORT` to `0`. +- `METRICS_SERVER_ENABLED`: starts a Prometheus server exporting VM metrics. +- `METRICS_SERVER_PORT`: if set, determines the port the Prometheus server will listen to (set to `0` by default). +- `METRICS_COLLECTOR_ENABLED`: starts a Prometheus collector. If `METRICS_SERVER_ENABLED` is not set, enabling the collector implicitly sets `METRICS_SERVER_ENABLED` to `true`. When utilizing the metrics collector feature, follow the instructions in the e2e [README](../../e2e/README.md#monitoring) to set the required Prometheus environment variables. -Running the re-execution test in CI will always set `METRICS_COLLECTOR_ENABLED=true`. +Running the re-execution test in CI will always set `METRICS_SERVER_ENABLED=true` and `METRICS_COLLECTOR_ENABLED=true`. ## Quick Start @@ -237,7 +238,7 @@ The `CONFIG` parameter currently only supports pre-defined configs and not passi The C-Chain benchmarks export VM metrics to the same Grafana instance as AvalancheGo CI: https://grafana-poc.avax-dev.network/. -To export metrics for a local run, simply set the Taskfile variable `METRICS_COLLECTOR_ENABLED=true` either via environment variable or passing it at the command line. +To export metrics for a local run, simply set the Taskfile variables `METRICS_SERVER_ENABLED=true` and `METRICS_COLLECTOR_ENABLED=true` either via environment variable or passing it at the command line. You can view granular C-Chain processing metrics with the label attached to this job (job="c-chain-reexecution") [here](https://grafana-poc.avax-dev.network/d/Gl1I20mnk/c-chain?orgId=1&from=now-5m&to=now&timezone=browser&var-datasource=P1809F7CD0C75ACF3&var-filter=job%7C%3D%7Cc-chain-reexecution&var-chain=C&refresh=10s). diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go index 43cebd37301a..124ad589e571 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -63,7 +63,8 @@ var ( executionTimeout time.Duration labelsArg string - metricsServerPort *uint64 + metricsServerEnabledArg bool + metricsServerPortArg uint64 metricsCollectorEnabledArg bool networkUUID string = uuid.NewString() @@ -104,16 +105,8 @@ func TestMain(m *testing.M) { flag.IntVar(&chanSizeArg, "chan-size", 100, "Size of the channel to use for block processing.") flag.DurationVar(&executionTimeout, "execution-timeout", 0, "Benchmark execution timeout. After this timeout has elapsed, terminate the benchmark without error. If 0, no timeout is applied.") - flag.Func("metrics-server-port", "Starts a metrics server and sets the port it will listen to", func(s string) error { - port, err := strconv.ParseUint(s, 10, 64) - if err != nil { - return err - } - - metricsServerPort = new(uint64) - *metricsServerPort = port - return nil - }) + flag.BoolVar(&metricsServerEnabledArg, "metrics-server-enabled", false, "Whether to enable the metrics server.") + flag.Uint64Var(&metricsServerPortArg, "metrics-server-port", 0, "The port the metrics server will listen to.") flag.BoolVar(&metricsCollectorEnabledArg, "metrics-collector-enabled", false, "Whether to enable the metrics collector (if true, then metrics-server-enabled must be true as well).") flag.StringVar(&labelsArg, "labels", "", "Comma separated KV list of metric labels to attach to all exported metrics. Ex. \"owner=tim,runner=snoopy\"") @@ -128,8 +121,8 @@ func TestMain(m *testing.M) { flag.Parse() - if metricsCollectorEnabledArg && metricsServerPort == nil { - metricsServerPort = new(uint64) + if metricsCollectorEnabledArg { + metricsServerEnabledArg = true } customLabels, err := parseCustomLabels(labelsArg) @@ -165,7 +158,8 @@ func BenchmarkReexecuteRange(b *testing.B) { startBlockArg, endBlockArg, chanSizeArg, - metricsServerPort, + metricsServerEnabledArg, + metricsServerPortArg, metricsCollectorEnabledArg, ) }) @@ -179,7 +173,8 @@ func benchmarkReexecuteRange( startBlock uint64, endBlock uint64, chanSize int, - metricsPort *uint64, + metricsServerEnabled bool, + metricsPort uint64, metricsCollectorEnabled bool, ) { r := require.New(b) @@ -199,8 +194,8 @@ func benchmarkReexecuteRange( log := tests.NewDefaultLogger("c-chain-reexecution") - if metricsPort != nil { - serverAddr := startServer(b, log, prefixGatherer, *metricsPort) + if metricsServerEnabled { + serverAddr := startServer(b, log, prefixGatherer, metricsPort) if metricsCollectorEnabled { startCollector(b, log, "c-chain-reexecution", labels, serverAddr)