From 1b63f015e6a1c9c28b64f4468ec855a951f343cb Mon Sep 17 00:00:00 2001 From: Ryan Patterson Date: Sat, 28 Feb 2026 19:28:43 +0800 Subject: [PATCH] fix stale daemon cleanup --- libshpool/src/daemon/mod.rs | 8 +++++++ libshpool/src/daemonize.rs | 23 ++++++++++++++---- shpool/tests/daemon.rs | 47 ++++++++++++++++++++++++++++++++++++- 3 files changed, 72 insertions(+), 6 deletions(-) diff --git a/libshpool/src/daemon/mod.rs b/libshpool/src/daemon/mod.rs index 5f18e473..1f014db6 100644 --- a/libshpool/src/daemon/mod.rs +++ b/libshpool/src/daemon/mod.rs @@ -65,6 +65,14 @@ pub fn run( } Err(e) => { info!("no systemd activation socket: {:?}", e); + // If a stale socket file exists (file on disk, nothing listening), + // remove it before binding so we don't get EADDRINUSE. + if let Err(connect_err) = std::os::unix::net::UnixStream::connect(&socket) { + if connect_err.kind() == std::io::ErrorKind::ConnectionRefused { + info!("removing stale socket file at {:?}", socket); + std::fs::remove_file(&socket).context("removing stale socket before bind")?; + } + } (Some(socket.clone()), UnixListener::bind(&socket).context("binding to socket")?) } }; diff --git a/libshpool/src/daemonize.rs b/libshpool/src/daemonize.rs index 0fde9416..867aa5a6 100644 --- a/libshpool/src/daemonize.rs +++ b/libshpool/src/daemonize.rs @@ -33,11 +33,24 @@ where { let control_sock = control_sock.as_ref(); - if UnixStream::connect(control_sock).is_ok() { - info!("daemon already running on {:?}, no need to autodaemonize", control_sock); - // There is already a daemon listening on the control socket, we - // don't need to do anything. - return Ok(()); + match UnixStream::connect(control_sock) { + Ok(_) => { + info!("daemon already running on {:?}, no need to autodaemonize", control_sock); + // There is already a daemon listening on the control socket, we + // don't need to do anything. + return Ok(()); + } + Err(e) if e.kind() == std::io::ErrorKind::ConnectionRefused => { + // The socket file exists but nothing is listening (stale socket). + // Remove it so the new daemon can bind successfully. + info!("stale socket at {:?}, removing before autodaemonizing", control_sock); + std::fs::remove_file(control_sock) + .with_context(|| format!("removing stale socket at {:?}", control_sock))?; + } + Err(_) => { + // Socket file does not exist or other error; fall through to spawn + // daemon. + } } info!("no daemon running on {:?}, autodaemonizing", control_sock); diff --git a/shpool/tests/daemon.rs b/shpool/tests/daemon.rs index 0957ba48..5a041baf 100644 --- a/shpool/tests/daemon.rs +++ b/shpool/tests/daemon.rs @@ -1,7 +1,10 @@ use std::{ fmt::Write, io::Read, - os::unix::{net::UnixListener, process::CommandExt as _}, + os::unix::{ + net::{UnixListener, UnixStream}, + process::CommandExt as _, + }, path, process::{Command, Stdio}, time, @@ -254,6 +257,48 @@ fn cleanup_socket() -> anyhow::Result<()> { Ok(()) } +#[test] +#[timeout(30000)] +fn stale_socket_autodaemonize() -> anyhow::Result<()> { + let tmp_dir = tmpdir::Dir::new("/tmp/shpool-test")?; + let socket_path = tmp_dir.path().join("shpool.socket"); + + // Create a stale socket: UnixListener::drop() closes the fd but does NOT + // remove the socket file on Linux/macOS, leaving a dead file on disk. + { + let _listener = UnixListener::bind(&socket_path).context("binding stale socket")?; + } + assert!(socket_path.exists(), "stale socket file should persist after listener drop"); + assert!( + matches!( + UnixStream::connect(&socket_path).map_err(|e| e.kind()), + Err(std::io::ErrorKind::ConnectionRefused) + ), + "expected ConnectionRefused on stale socket" + ); + + // shpool list --daemonize should succeed despite the stale socket file. + // Without the fix this times out with "control socket never came up". + let log_file = tmp_dir.path().join("shpool.log"); + let out = Command::new(support::shpool_bin()?) + .arg("--daemonize") + .arg("--socket") + .arg(&socket_path) + .arg("--log-file") + .arg(&log_file) + .arg("--config-file") + .arg(support::testdata_file("norc.toml")) + .arg("list") + .output() + .context("running shpool list --daemonize")?; + + // Best-effort cleanup of the background daemon spawned by --daemonize. + Command::new("pkill").arg("-f").arg(socket_path.to_string_lossy().as_ref()).output().ok(); + + assert!(out.status.success(), "shpool list should succeed despite stale socket"); + Ok(()) +} + #[test] #[timeout(30000)] fn echo_sentinel() -> anyhow::Result<()> {