Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,13 @@ txtx-core = { version = "0.4.15" }
txtx-gql = { version = "0.3.9" }
txtx-supervisor-ui = { version = "0.2.10", default-features = false }

opentelemetry = { version = "0.28", default-features = false, features = ["metrics"] }
opentelemetry_sdk = { version = "0.28", default-features = false, features = ["rt-tokio", "metrics"] }
opentelemetry-otlp = { version = "0.28", default-features = false, features = ["metrics", "grpc-tonic"] }
opentelemetry-prometheus = { version = "0.28", default-features = false }
prometheus = { version = "0.13", default-features = false }
axum = { version = "0.8", default-features = false, features = ["tokio", "http1"] }

# [patch.crates-io]
## Local
# txtx-addon-kit = { path = "../txtx/crates/txtx-addon-kit" }
Expand Down
1 change: 1 addition & 0 deletions crates/cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ postgres = ["surfpool-gql/postgres", "surfpool-core/postgres"]
version_check = []
subgraph = ["surfpool-core/subgraph"]
register-tracing = ["surfpool-core/register-tracing"]
prometheus = ["surfpool-core/prometheus"]

[target.'cfg(not(target_os = "windows"))'.dependencies]
fork = "0.2.0"
25 changes: 25 additions & 0 deletions crates/cli/src/cli/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ use solana_keypair::Keypair;
use solana_pubkey::Pubkey;
use solana_signer::{EncodableKey, Signer};
use surfpool_mcp::McpOptions;
#[cfg(feature = "prometheus")]
use surfpool_types::TelemetryConfig;
use surfpool_types::{
AccountSnapshot, BlockProductionMode, CHANGE_TO_DEFAULT_STUDIO_PORT_ONCE_SUPERVISOR_MERGED,
DEFAULT_DEVNET_RPC_URL, DEFAULT_GOSSIP_PORT, DEFAULT_MAINNET_RPC_URL, DEFAULT_NETWORK_HOST,
Expand Down Expand Up @@ -260,6 +262,20 @@ pub struct StartSimnet {
/// When multiple files are provided, later files override earlier ones for duplicate keys.
#[arg(long = "snapshot")]
pub snapshot: Vec<String>,
/// Enable Prometheus metrics endpoint
#[cfg(feature = "prometheus")]
/// Enable Prometheus metrics endpoint
#[arg(long = "metrics-enabled", env = "SURFPOOL_METRICS_ENABLED")]
pub metrics_enabled: bool,

#[cfg(feature = "prometheus")]
/// Prometheus metrics endpoint address
#[arg(
long = "metrics-addr",
default_value = "0.0.0.0:9000",
env = "SURFPOOL_METRICS_ADDR"
)]
pub metrics_addr: String,
/// Skip signature verification for all transactions (eg. surfpool start --skip-signature-verification)
#[clap(long = "skip-signature-verification", action=ArgAction::SetTrue, default_value = "false")]
pub skip_signature_verification: bool,
Expand Down Expand Up @@ -459,6 +475,15 @@ impl StartSimnet {
subgraph: self.subgraph_config(),
studio: self.studio_config(),
plugin_config_path,
#[cfg(feature = "prometheus")]
telemetry: self.telemetry_config(),
}
}
#[cfg(feature = "prometheus")]
pub fn telemetry_config(&self) -> TelemetryConfig {
TelemetryConfig {
enabled: self.metrics_enabled,
prometheus_addr: self.metrics_addr.clone(),
}
}
}
Expand Down
37 changes: 37 additions & 0 deletions crates/cli/src/cli/simnet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,28 @@ pub async fn handle_start_local_surfnet_command(
let (mut surfnet_svm, simnet_events_rx, geyser_events_rx) =
SurfnetSvm::new_with_db(cmd.db.as_deref(), &cmd.surfnet_id)
.map_err(|e| format!("Failed to initialize Surfnet SVM: {}", e))?;
#[cfg(feature = "prometheus")]
{
let telemetry_config = cmd.telemetry_config();
if telemetry_config.enabled {
match surfpool_core::telemetry::init_from_config(
telemetry_config.enabled,
&telemetry_config.prometheus_addr,
) {
Err(e) => {
let _ = surfnet_svm
.simnet_events_tx
.send(SimnetEvent::warn(format!("Metrics init failed: {}", e)));
}
Ok(_) => {
let _ = surfnet_svm.simnet_events_tx.send(SimnetEvent::info(format!(
"Metrics available at http://{}/metrics",
telemetry_config.prometheus_addr
)));
}
}
}
}

// Apply feature configuration from CLI flags
let feature_config = cmd.feature_config();
Expand Down Expand Up @@ -443,6 +465,21 @@ fn log_events(
let _ = simnet_commands_tx
.send(SimnetCommand::CompleteRunbookExecution(runbook_id, errors));
}
#[cfg(feature = "prometheus")]
SimnetEvent::MetricsData(metrics_data) => {
surfpool_core::telemetry::metrics().record_svm_state(
metrics_data.slot,
metrics_data.epoch,
metrics_data.slot_index,
metrics_data.transactions_count,
metrics_data.transactions_processed,
metrics_data.start_time,
metrics_data.signature_subs,
metrics_data.account_subs,
metrics_data.slot_subs,
metrics_data.logs_subs,
);
}
},
Err(_e) => {
break;
Expand Down
15 changes: 15 additions & 0 deletions crates/cli/src/tui/simnet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,21 @@ fn run_app<B: Backend>(terminal: &mut Terminal<B>, mut app: App) -> io::Result<(
);
app.status_bar_message = None;
}
#[cfg(feature = "prometheus")]
SimnetEvent::MetricsData(metrics_data) => {
{
// In TUI mode, we don't need to record metrics to Prometheus
// because the metrics server is running in its own thread
// But we might want to display something in debug mode
if app.include_debug_logs {
new_events.push((
EventType::Debug,
Local::now(),
format!("Metrics updated: slot={}", metrics_data.slot),
));
}
}
}
},
Err(_) => break,
},
Expand Down
8 changes: 7 additions & 1 deletion crates/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,12 @@ anchor-lang-idl = { workspace = true }
txtx-addon-kit = { workspace = true }
txtx-addon-network-svm-types = { workspace = true }
txtx-addon-network-svm = { workspace = true }

# Prometheus metrics - declare normally, control via feature
opentelemetry = { version = "0.28", default-features = false, features = ["metrics"], optional = true }
opentelemetry_sdk = { version = "0.28", default-features = false, features = ["rt-tokio", "metrics"], optional = true }
opentelemetry-prometheus = { version = "0.28", default-features = false, optional = true }
prometheus = { version = "0.13", default-features = false, optional = true }
axum = { version = "0.8", default-features = false, features = ["tokio", "http1"], optional = true }

[dev-dependencies]
test-case = { workspace = true }
Expand All @@ -116,3 +121,4 @@ ignore_tests_ci = []
geyser_plugin = [] # Disabled: solana-geyser-plugin-manager conflicts with litesvm 0.9.1
subgraph = ["surfpool-subgraph"]
register-tracing = ["litesvm/register-tracing"]
prometheus = ["dep:opentelemetry", "dep:opentelemetry_sdk", "dep:opentelemetry-prometheus", "dep:prometheus", "dep:axum"]
1 change: 1 addition & 0 deletions crates/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pub mod runloops;
pub mod scenarios;
pub mod storage;
pub mod surfnet;
pub mod telemetry;
pub mod types;

use crossbeam_channel::{Receiver, Sender};
Expand Down
57 changes: 56 additions & 1 deletion crates/core/src/rpc/admin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::time::Duration;
use jsonrpc_core::{BoxFuture, Result};
use jsonrpc_derive::rpc;
use solana_client::rpc_custom_error::RpcCustomError;
use surfpool_types::{SimnetCommand, SimnetEvent};
use surfpool_types::{SimnetCommand, SimnetEvent, SurfpoolStatus, WsSubscriptions};
use txtx_addon_network_svm_types::subgraph::PluginConfig;
use uuid::Uuid;

Expand Down Expand Up @@ -192,6 +192,9 @@ pub trait AdminRpc {
/// - This method is useful for monitoring system uptime and verifying system health.
#[rpc(meta, name = "startTime")]
fn start_time(&self, meta: Self::Metadata) -> Result<String>;

#[rpc(meta, name = "surfpoolStatus")]
fn surfpool_status(&self, meta: Self::Metadata) -> Result<SurfpoolStatus>;
}

pub struct SurfpoolAdminRpc;
Expand Down Expand Up @@ -363,4 +366,56 @@ impl AdminRpc for SurfpoolAdminRpc {
let datetime_utc: chrono::DateTime<chrono::Utc> = system_time.into();
Ok(datetime_utc.to_rfc3339())
}
fn surfpool_status(&self, meta: Self::Metadata) -> Result<SurfpoolStatus> {
// Ensure we have RunloopContext metadata
let Some(ctx) = meta else {
return Err(RpcCustomError::NodeUnhealthy {
num_slots_behind: None,
}
.into());
};

// Read a snapshot of SVM state under a reader lock
// Read a consistent snapshot of SVM state
let status = ctx.svm_locker.with_svm_reader(|svm| {
// Epoch / slot info
let slot = svm.latest_epoch_info.absolute_slot;
let epoch = svm.latest_epoch_info.epoch;
let slot_index = svm.latest_epoch_info.slot_index;

// transactions_count via Storage::count(); fall back to 0 on error
let transactions_count = svm.transactions.count().unwrap_or(0);

// monotonic processed counter
let transactions_processed = svm.transactions_processed;

// subscription counts (in-memory collections)
let signature_subscriptions = svm.signature_subscriptions.len();
let account_subscriptions = svm.account_subscriptions.len();
let slot_subscriptions = svm.slot_subscriptions.len();
let logs_subscriptions = svm.logs_subscriptions.len();

// uptime in ms
let uptime_ms = match std::time::SystemTime::now().duration_since(svm.start_time) {
Ok(d) => d.as_millis() as u64,
Err(_) => 0,
};

SurfpoolStatus {
slot,
epoch,
slot_index,
transactions_count,
transactions_processed,
uptime_ms,
ws_subscriptions: WsSubscriptions {
signatures: signature_subscriptions,
accounts: account_subscriptions,
slots: slot_subscriptions,
logs: logs_subscriptions,
},
}
});
Ok(status)
}
}
22 changes: 22 additions & 0 deletions crates/core/src/surfnet/locker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ use solana_transaction_status::{
TransactionConfirmationStatus as SolanaTransactionConfirmationStatus, UiConfirmedBlock,
UiTransactionEncoding,
};
#[cfg(feature = "prometheus")]
use surfpool_types::MetricsData;
use surfpool_types::{
AccountSnapshot, ComputeUnitsEstimationResult, ExecutionCapture, ExportSnapshotConfig, Idl,
KeyedProfileResult, ProfileResult, RpcProfileResultConfig, RunbookExecutionStatusReport,
Expand Down Expand Up @@ -1043,6 +1045,26 @@ impl SurfnetSvmLocker {
self.with_svm_writer(|svm_writer| {
svm_writer.write_executed_profile_result(signature, profile_result)
})?;

#[cfg(feature = "prometheus")]
{
let metric_data = self.with_svm_reader(|svm| MetricsData {
slot: svm.latest_epoch_info.absolute_slot,
epoch: svm.latest_epoch_info.epoch,
slot_index: svm.latest_epoch_info.slot_index,
transactions_count: svm.transactions.count().unwrap_or(0) as usize,
transactions_processed: svm.transactions_processed,
start_time: svm.start_time,
signature_subs: svm.signature_subscriptions.len(),
account_subs: svm.account_subscriptions.len(),
slot_subs: svm.slot_subscriptions.len(),
logs_subs: svm.logs_subscriptions.len(),
});
let _ = self
.simnet_events_tx()
.send(SimnetEvent::MetricsData(metric_data));
}

Ok(())
}

Expand Down
Loading