diff --git a/docs/DEV_FEE_TIMEOUT_SAFETY.md b/docs/DEV_FEE_TIMEOUT_SAFETY.md new file mode 100644 index 00000000..8e4a26ee --- /dev/null +++ b/docs/DEV_FEE_TIMEOUT_SAFETY.md @@ -0,0 +1,94 @@ +# Dev Fee Timeout Safety — Duplicate Payment Prevention + +## Overview + +When a dev fee Lightning payment times out, Mostrod queries the LN node for +the actual payment status before deciding whether to reset and retry. This +prevents duplicate payments caused by race conditions between timeouts and +in-flight payments. + +## Problem + +Originally, when the 50-second timeout expired during a dev fee payment, the +code unconditionally reset `dev_fee_paid = false` and cleared the payment hash. +However, a timeout does not mean the payment failed — the Lightning payment +could still be in-flight or may have succeeded after the timeout window. + +This created a race condition: + +1. Payment initiated, times out locally (but still in-flight on LN) +2. Code resets order to unpaid state +3. Original payment succeeds on LN (but local state already reset) +4. Next scheduler cycle finds the order as "unpaid" and initiates a second payment +5. **Result: double payment** + +See [Issue #568](https://github.com/MostroP2P/mostro/issues/568) for full details. + +## Solution + +### Two-phase payment flow (`src/app/release.rs`, `src/scheduler.rs`) + +The dev fee payment is split into two phases: + +1. **Resolve phase** (`resolve_dev_fee_invoice`): LNURL resolution + invoice + decode to extract the real LN payment hash. +2. **Send phase** (`send_dev_fee_payment`): Sends the pre-resolved invoice via + LND. + +The real payment hash is stored in `dev_fee_payment_hash` **before** the payment +is dispatched. This ensures that on timeout or crash, the hash is always +available for querying LND. + +### `LndConnector::check_payment_status()` (`src/lightning/mod.rs`) + +Queries the LN node for the current status of a payment using `TrackPaymentV2`. +Returns the LND `PaymentStatus` enum (Succeeded, InFlight, Failed, Unknown). + +### `check_dev_fee_payment_status()` (`src/scheduler.rs`) + +Helper that: +1. Extracts the payment hash from the order (skips `PENDING-` markers, which + are legacy placeholders that cannot be tracked on LND) +2. Decodes the hex hash to bytes +3. Queries LND with a 10-second timeout +4. If payment succeeded: marks order as paid in DB +5. Returns a `DevFeePaymentState` enum for the caller + +With the two-phase flow, new payments always have a real hash stored before +sending, so step 1 passes through to the LND query. The `PENDING-` guard +remains only for backward compatibility with legacy markers from before this +change — those correctly return `DevFeePaymentState::Unknown` since there is +genuinely no trackable hash. + +### Timeout handler in `job_process_dev_fee_payment()` (`src/scheduler.rs`) + +Instead of unconditionally resetting on timeout: + +| LN Payment Status | Action | +|-------------------|--------| +| **Succeeded** | Mark as paid in DB, do NOT reset | +| **InFlight** | Skip reset, leave state intact (payment may still complete) | +| **Failed** | Safe to reset `dev_fee_paid = false` and retry | +| **Unknown** | Skip reset to err on the side of caution (avoid duplicate) | + +### Stale real-hash cleanup (`src/scheduler.rs`) + +A cleanup pass runs each cycle for orders that have `dev_fee_paid = true` and a +real (non-PENDING) payment hash. This handles crash recovery: if the process +crashes between storing the hash and receiving LND confirmation, the cleanup +queries LND and resets failed payments for retry. + +### Design Principle + +**When in doubt, don't retry.** A missed dev fee payment can be recovered +manually, but a duplicate payment is money lost. The code errs on the side of +caution — only resetting when the LN node confirms the payment definitively +failed. + +## Related + +- Issue: [#568](https://github.com/MostroP2P/mostro/issues/568) +- Dev fee invoice resolution: `src/app/release.rs` (`resolve_dev_fee_invoice`) +- Dev fee payment send: `src/app/release.rs` (`send_dev_fee_payment`) +- Dev fee scheduler: `src/scheduler.rs` (`job_process_dev_fee_payment`) +- Dev fee documentation: `docs/DEV_FEE.md` diff --git a/src/app/release.rs b/src/app/release.rs index 6f775780..7ca1cba9 100644 --- a/src/app/release.rs +++ b/src/app/release.rs @@ -2,12 +2,13 @@ use crate::config; use crate::config::constants::DEV_FEE_LIGHTNING_ADDRESS; use crate::config::MOSTRO_DB_PASSWORD; use crate::db::{self}; +use crate::lightning::invoice::decode_invoice; use crate::lightning::LndConnector; use crate::lnurl::resolv_ln_address; use crate::nip33::{new_order_event, order_to_tags}; use crate::util::{ - enqueue_order_msg, get_keys, get_nostr_client, get_order, settle_seller_hold_invoice, - update_order_event, + bytes_to_string, enqueue_order_msg, get_keys, get_nostr_client, get_order, + settle_seller_hold_invoice, update_order_event, }; use argon2::password_hash::SaltString; @@ -553,23 +554,22 @@ async fn payment_success( Ok(()) } -/// Send development fee payment via Lightning Network +/// Resolve the dev fee invoice and extract the real payment hash. /// -/// Attempts to pay the configured development fee for a completed order. -/// Uses LNURL resolution to get payment invoice, then sends payment via LND. +/// Performs LNURL resolution to get a BOLT11 invoice, then decodes it +/// to extract the payment hash. This allows storing the real hash in the +/// database *before* sending the payment, enabling LN status checks on +/// timeout or crash recovery. /// /// # Timeouts /// - LNURL resolution: 15 seconds -/// - send_payment call: 5 seconds -/// - Payment result wait: 25 seconds -/// - Total: 45 seconds maximum /// /// # Returns -/// - `Ok(String)`: Payment hash on successful payment -/// - `Err(MostroError)`: Error if payment fails or times out -pub async fn send_dev_fee_payment(order: &Order) -> Result { +/// - `Ok((payment_request, payment_hash_hex))` on success +/// - `Err(MostroError)` if resolution or decoding fails +pub async fn resolve_dev_fee_invoice(order: &Order) -> Result<(String, String), MostroError> { info!( - "Initiating dev fee payment for order {} - amount: {} sats to {}", + "Resolving dev fee invoice for order {} - amount: {} sats to {}", order.id, order.dev_fee, DEV_FEE_LIGHTNING_ADDRESS ); @@ -577,7 +577,7 @@ pub async fn send_dev_fee_payment(order: &Order) -> Result return Err(MostroInternalErr(ServiceError::WrongAmountError)); } - // Step 1: LNURL resolution (15s timeout) + // LNURL resolution (15s timeout) let payment_request = tokio::time::timeout( std::time::Duration::from_secs(15), resolv_ln_address(DEV_FEE_LIGHTNING_ADDRESS, order.dev_fee as u64), @@ -598,14 +598,60 @@ pub async fn send_dev_fee_payment(order: &Order) -> Result e })?; - // Step 2: Create LND connector + if payment_request.is_empty() { + error!( + "Dev fee LNURL resolution returned empty invoice for order {} ({} sats)", + order.id, order.dev_fee + ); + return Err(MostroInternalErr(ServiceError::LnAddressParseError)); + } + // Decode invoice and extract payment hash + let invoice = decode_invoice(&payment_request)?; + let payment_hash_hex = bytes_to_string(invoice.payment_hash().as_ref()); + + info!( + "Resolved dev fee invoice for order {} - hash: {}", + order.id, payment_hash_hex + ); + + Ok((payment_request, payment_hash_hex)) +} + +/// Send development fee payment via Lightning Network +/// +/// Sends a pre-resolved invoice payment via LND. The caller must first +/// call `resolve_dev_fee_invoice` to obtain the payment request and store +/// the payment hash in the database. +/// +/// # Timeouts +/// - send_payment call: 5 seconds +/// - Payment result wait: 25 seconds +/// - Total: 30 seconds maximum +/// +/// # Returns +/// - `Ok(String)`: Payment hash on successful payment +/// - `Err(MostroError)`: Error if payment fails or times out +pub async fn send_dev_fee_payment( + order: &Order, + payment_request: &str, +) -> Result { + info!( + "Sending dev fee payment for order {} - amount: {} sats", + order.id, order.dev_fee + ); + + if order.dev_fee <= 0 { + return Err(MostroInternalErr(ServiceError::WrongAmountError)); + } + + // Step 1: Create LND connector let mut ln_client = LndConnector::new().await?; let (tx, mut rx) = channel(100); - // Step 3: Send payment (5s timeout to prevent hanging) + // Step 2: Send payment (5s timeout to prevent hanging) tokio::time::timeout( std::time::Duration::from_secs(5), - ln_client.send_payment(&payment_request, order.dev_fee, tx), + ln_client.send_payment(payment_request, order.dev_fee, tx), ) .await .map_err(|_| { @@ -625,7 +671,7 @@ pub async fn send_dev_fee_payment(order: &Order) -> Result e })?; - // Step 4: Wait for payment result (25s timeout) + // Step 3: Wait for payment result (25s timeout) // Loop to receive multiple status messages from LND until terminal status let payment_result = tokio::time::timeout(std::time::Duration::from_secs(25), async { while let Some(msg) = rx.recv().await { @@ -671,7 +717,7 @@ pub async fn send_dev_fee_payment(order: &Order) -> Result MostroInternalErr(ServiceError::LnPaymentError("result timeout".to_string())) })??; // Double ? to unwrap both timeout Result and inner Result - // Step 5: Log and return the successful payment hash + // Step 4: Log and return the successful payment hash info!( "Dev fee payment succeeded for order {} - amount: {} sats, hash: {}", order.id, order.dev_fee, payment_result diff --git a/src/db.rs b/src/db.rs index bd20432f..5779db2e 100644 --- a/src/db.rs +++ b/src/db.rs @@ -903,7 +903,6 @@ pub async fn find_unpaid_dev_fees(pool: &SqlitePool) -> Result, Mostr WHERE (status = 'settled-hold-invoice' OR status = 'success') AND dev_fee > 0 AND dev_fee_paid = 0 - AND (dev_fee_payment_hash IS NULL OR dev_fee_payment_hash NOT LIKE 'PENDING-%') "#, ) .fetch_all(pool) diff --git a/src/lightning/mod.rs b/src/lightning/mod.rs index 153b1197..89c3ece4 100644 --- a/src/lightning/mod.rs +++ b/src/lightning/mod.rs @@ -262,6 +262,47 @@ impl LndConnector { Ok(()) } + /// Query the current status of a payment by its hash. + /// + /// Returns the LND `PaymentStatus` if the payment is found, or an error + /// if the payment cannot be tracked (e.g., unknown hash). + pub async fn check_payment_status( + &mut self, + payment_hash: &[u8], + ) -> Result { + let track_req = TrackPaymentRequest { + payment_hash: payment_hash.to_vec(), + no_inflight_updates: false, + }; + + let mut stream = self + .client + .router() + .track_payment_v2(track_req) + .await + .map_err(|e| MostroInternalErr(ServiceError::LnPaymentError(e.to_string())))? + .into_inner(); + + // Get the first (current) status update + match stream.message().await { + Ok(Some(payment)) => fedimint_tonic_lnd::lnrpc::payment::PaymentStatus::try_from( + payment.status, + ) + .map_err(|_| { + MostroInternalErr(ServiceError::LnPaymentError( + "Unknown payment status".to_string(), + )) + }), + Ok(None) => Err(MostroInternalErr(ServiceError::LnPaymentError( + "No payment status received (stream ended)".to_string(), + ))), + Err(e) => Err(MostroInternalErr(ServiceError::LnPaymentError(format!( + "Failed to get payment status: {}", + e + )))), + } + } + pub async fn get_node_info(&mut self) -> Result { let info = self.client.lightning().get_info(GetInfoRequest {}).await; diff --git a/src/scheduler.rs b/src/scheduler.rs index e8c35492..922e4f3e 100644 --- a/src/scheduler.rs +++ b/src/scheduler.rs @@ -1,4 +1,4 @@ -use crate::app::release::{do_payment, send_dev_fee_payment}; +use crate::app::release::{do_payment, resolve_dev_fee_invoice, send_dev_fee_payment}; use crate::bitcoin_price::BitcoinPriceManager; use crate::config; use crate::db::*; @@ -556,6 +556,12 @@ async fn job_process_dev_fee_payment() { let pool = get_db_pool(); let interval = 60u64; // Every 60 seconds + let mut ln_client = if let Ok(client) = LndConnector::new().await { + client + } else { + return error!("Failed to create LND client for dev fee payment job"); + }; + tokio::spawn(async move { loop { info!("Checking for unpaid development fees"); @@ -640,6 +646,49 @@ async fn job_process_dev_fee_payment() { } } + // Cleanup stale real-hash entries (crash recovery) + // These orders have a real payment hash stored before sending, but the + // payment may not have completed (e.g. crash between storing hash and + // receiving LND confirmation). Check LND for actual status. + if let Ok(real_hash_orders) = sqlx::query_as::<_, Order>( + "SELECT * FROM orders + WHERE dev_fee_paid = 1 + AND dev_fee_payment_hash IS NOT NULL + AND dev_fee_payment_hash NOT LIKE 'PENDING-%' + AND (status = 'settled-hold-invoice' OR status = 'success')", + ) + .fetch_all(&*pool) + .await + { + for mut real_hash_order in real_hash_orders { + let order_id = real_hash_order.id; + match check_dev_fee_payment_status(&real_hash_order, &pool, &mut ln_client) + .await + { + DevFeePaymentState::Succeeded => { + // Already handled by check_dev_fee_payment_status + } + DevFeePaymentState::Failed => { + info!( + "Stale dev fee payment failed for order {}, resetting for retry", + order_id + ); + real_hash_order.dev_fee_paid = false; + real_hash_order.dev_fee_payment_hash = None; + if let Err(e) = real_hash_order.update(&pool).await { + error!( + "Failed to reset stale failed payment for order {}: {:?}", + order_id, e + ); + } + } + DevFeePaymentState::InFlight | DevFeePaymentState::Unknown => { + // Leave alone - payment may still complete + } + } + } + } + // Query unpaid orders if let Ok(unpaid_orders) = find_unpaid_dev_fees(&pool).await { info!("Found {} orders with unpaid dev fees", unpaid_orders.len()); @@ -682,29 +731,59 @@ async fn job_process_dev_fee_payment() { continue; // Skip payment attempt } - // STEP 1: Pre-mark as paid to prevent duplicate attempts + // STEP 1: Resolve invoice and extract real payment hash let order_id = order.id; - info!("Pre-marking order {} as payment pending", order_id); + info!("Resolving dev fee invoice for order {}", order_id); + + let (payment_request, payment_hash_hex) = match tokio::time::timeout( + std::time::Duration::from_secs(20), + resolve_dev_fee_invoice(&order), + ) + .await + { + Ok(Ok(result)) => result, + Ok(Err(e)) => { + error!( + "Failed to resolve dev fee invoice for order {}: {:?}", + order_id, e + ); + continue; // Skip this order, will retry next cycle + } + Err(_) => { + error!( + "Dev fee invoice resolution timeout (20s) for order {}", + order_id + ); + continue; // Skip this order, will retry next cycle + } + }; + + // STEP 2: Store real payment hash before sending + info!( + "Storing payment hash {} for order {}", + payment_hash_hex, order_id + ); order.dev_fee_paid = true; - let pending_ts = Utc::now().timestamp() as u64; - order.dev_fee_payment_hash = - Some(format!("PENDING-{}-{}", order_id, pending_ts)); + order.dev_fee_payment_hash = Some(payment_hash_hex.clone()); let mut order = match order.update(&pool).await { Err(e) => { - error!("Failed to pre-mark dev fee for order {}: {:?}", order_id, e); + error!( + "Failed to store payment hash for order {}: {:?}", + order_id, e + ); continue; // Skip this order, will retry next cycle } Ok(updated_order) => { - info!("Order {} marked as payment pending", order_id); + info!("Order {} marked with real payment hash", order_id); updated_order } }; - // STEP 2: Attempt payment (protected from retry by dev_fee_paid = true) + // STEP 3: Send payment with pre-resolved invoice match tokio::time::timeout( std::time::Duration::from_secs(50), - send_dev_fee_payment(&order), + send_dev_fee_payment(&order, &payment_request), ) .await { @@ -712,10 +791,16 @@ async fn job_process_dev_fee_payment() { let order_id = order.id; let dev_fee_amount = order.dev_fee; - // STEP 3: Update with actual payment hash - order.dev_fee_payment_hash = Some(payment_hash.clone()); + // STEP 4: Verify hash matches, use LND's value as authoritative + if order.dev_fee_payment_hash.as_deref() != Some(&payment_hash) { + warn!( + "Order {}: LND returned hash '{}' differs from stored hash '{:?}', using LND's value", + order_id, payment_hash, order.dev_fee_payment_hash + ); + order.dev_fee_payment_hash = Some(payment_hash.clone()); + } - info!("Payment succeeded for order {}, updating hash", order_id); + info!("Payment succeeded for order {}, verifying DB", order_id); match order.update(&pool).await { Err(e) => { @@ -768,7 +853,7 @@ async fn job_process_dev_fee_payment() { } } Ok(Err(e)) => { - // STEP 4: Payment failed, reset to unpaid for retry + // STEP 5: Payment failed, reset to unpaid for retry let order_id = order.id; error!( "Dev fee payment failed for order {} - error: {:?}", @@ -797,26 +882,74 @@ async fn job_process_dev_fee_payment() { } } Err(_) => { - // STEP 5: Timeout — DO NOT reset to unpaid. - // - // A timeout does NOT mean the payment failed. The Lightning - // payment may still be in-flight and could succeed after the - // timeout window. Resetting dev_fee_paid=false here would - // cause a duplicate payment on the next scheduler cycle. - // - // Instead, leave the order in PENDING state (dev_fee_paid=true, - // dev_fee_payment_hash="PENDING-{uuid}-{ts}"). The stale - // PENDING detection (with timestamp-based TTL) will clean it - // up after the configured TTL if the payment never resolves. - // - // See: https://github.com/MostroP2P/mostro/issues/568 + // STEP 6: Timeout — check actual payment status before resetting + // A timeout does NOT mean the payment failed; it could still be + // in-flight or may have succeeded. Blindly resetting would cause + // duplicate payments (see #568). let order_id = order.id; let dev_fee = order.dev_fee; warn!( - "Dev fee payment timeout (50s) for order {} ({} sats). \ - Leaving in PENDING state — stale cleanup will handle if payment never resolves.", + "Dev fee payment timeout (50s) for order {} ({} sats), checking LN status", order_id, dev_fee ); + + // Try to check the payment status on the LN node + let should_reset = match check_dev_fee_payment_status( + &order, + &pool, + &mut ln_client, + ) + .await + { + DevFeePaymentState::Succeeded => { + info!( + "Payment actually succeeded for order {} despite timeout", + order_id + ); + false // Don't reset — already handled + } + DevFeePaymentState::InFlight => { + warn!( + "Payment still in-flight for order {}, skipping reset", + order_id + ); + false // Don't reset — payment may still complete + } + DevFeePaymentState::Failed => { + info!( + "Payment definitively failed for order {}, safe to retry", + order_id + ); + true // Safe to reset and retry + } + DevFeePaymentState::Unknown => { + warn!( + "Cannot determine payment status for order {}, skipping reset to avoid duplicate", + order_id + ); + false // Err on the side of caution + } + }; + + if should_reset { + order.dev_fee_paid = false; + order.dev_fee_payment_hash = None; + + match order.update(&pool).await { + Err(e) => { + error!( + "Failed to reset after timeout for order {}: {:?}", + order_id, e + ); + } + Ok(_) => { + info!( + "Reset order {} to unpaid after confirmed failure, will retry", + order_id + ); + } + } + } } } } @@ -827,6 +960,101 @@ async fn job_process_dev_fee_payment() { }); } +/// Possible states of a dev fee payment after checking the LN node. +enum DevFeePaymentState { + /// Payment confirmed successful on the LN node. + Succeeded, + /// Payment is still in-flight on the LN network. + InFlight, + /// Payment definitively failed — safe to retry. + Failed, + /// Could not determine status (LN node unreachable, unknown hash, etc.) + Unknown, +} + +/// Check the actual payment status on the LN node for a dev fee payment. +/// +/// If the payment succeeded, marks the order as paid in the DB. +/// Returns the current payment state so the caller can decide whether to reset. +async fn check_dev_fee_payment_status( + order: &Order, + pool: &sqlx::Pool, + ln_client: &mut LndConnector, +) -> DevFeePaymentState { + use fedimint_tonic_lnd::lnrpc::payment::PaymentStatus; + + // Get the payment hash — if it's a PENDING marker or missing, we can't check + let payment_hash_str = match &order.dev_fee_payment_hash { + Some(h) if !h.starts_with("PENDING-") => h.clone(), + _ => { + warn!( + "Order {} has no trackable payment hash, cannot verify LN status", + order.id + ); + return DevFeePaymentState::Unknown; + } + }; + + // Decode hex hash to bytes + use nostr_sdk::nostr::hashes::hex::FromHex; + let payment_hash_bytes: Vec = match FromHex::from_hex(&payment_hash_str) { + Ok(bytes) => bytes, + Err(e) => { + error!( + "Failed to decode payment hash '{}' for order {}: {}", + payment_hash_str, order.id, e + ); + return DevFeePaymentState::Unknown; + } + }; + + // Query LND for the payment status + match tokio::time::timeout( + std::time::Duration::from_secs(10), + ln_client.check_payment_status(&payment_hash_bytes), + ) + .await + { + Ok(Ok(status)) => match status { + PaymentStatus::Succeeded => { + // Payment actually went through — update DB + let order_id = order.id; + let mut order = order.clone(); + order.dev_fee_paid = true; + if let Err(e) = order.update(pool).await { + error!( + "Payment succeeded but failed to update DB for order {}: {:?}", + order_id, e + ); + } else { + info!( + "✅ Order {} dev fee payment confirmed via LN status check", + order_id + ); + } + DevFeePaymentState::Succeeded + } + PaymentStatus::InFlight => DevFeePaymentState::InFlight, + PaymentStatus::Failed => DevFeePaymentState::Failed, + _ => DevFeePaymentState::Unknown, + }, + Ok(Err(e)) => { + warn!( + "LN status check failed for order {} (hash {}): {:?}", + order.id, payment_hash_str, e + ); + DevFeePaymentState::Unknown + } + Err(_) => { + warn!( + "LN status check timed out for order {} (hash {})", + order.id, payment_hash_str + ); + DevFeePaymentState::Unknown + } + } +} + #[cfg(test)] mod tests { use super::parse_pending_timestamp;