From bb4525bf3f193d9d8fbb99f1ac28c315ec32a7ea Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Fri, 13 Mar 2026 15:46:12 +0100 Subject: [PATCH 1/9] Take shutdown inhibitor lock for graceful service teardown Handle external shutdown events (ACPI power button, hypervisor shutdown) by listening to logind's PrepareForShutdown signal. A delay inhibitor lock is acquired on startup so Supervisor has time to gracefully stop all managed services before the host proceeds with shutdown. Changes: - Add inhibit() and prepare_for_shutdown() methods to Logind D-Bus interface - Enable Unix FD negotiation on the D-Bus message bus for inhibitor lock FDs - Add background monitor task in HostManager that listens for the signal - Track the monitor task and cancel it cleanly via new unload() method - Wire host unload into Core.stop() stage 2 for clean shutdown - Add PrepareForShutdown signal constant to dbus/const.py Co-Authored-By: Claude Opus 4.6 --- supervisor/core.py | 1 + supervisor/dbus/const.py | 3 ++ supervisor/dbus/logind.py | 19 +++++++- supervisor/dbus/manager.py | 2 +- supervisor/host/manager.py | 72 +++++++++++++++++++++++++++++- tests/dbus/test_login.py | 31 +++++++++++++ tests/dbus_service_mocks/logind.py | 16 +++++++ tests/host/test_manager.py | 59 +++++++++++++++++++++++- 8 files changed, 197 insertions(+), 6 deletions(-) diff --git a/supervisor/core.py b/supervisor/core.py index 0f77bd3b992..121d2168148 100644 --- a/supervisor/core.py +++ b/supervisor/core.py @@ -340,6 +340,7 @@ async def stop(self) -> None: self.sys_websession.close(), self.sys_ingress.unload(), self.sys_hardware.unload(), + self.sys_host.unload(), self.sys_dbus.unload(), ) ] diff --git a/supervisor/dbus/const.py b/supervisor/dbus/const.py index 8e49cc3f701..e31dc5228d6 100644 --- a/supervisor/dbus/const.py +++ b/supervisor/dbus/const.py @@ -48,6 +48,9 @@ DBUS_SIGNAL_NM_CONNECTION_ACTIVE_CHANGED = ( "org.freedesktop.NetworkManager.Connection.Active.StateChanged" ) +DBUS_SIGNAL_LOGIND_PREPARE_FOR_SHUTDOWN = ( + "org.freedesktop.login1.Manager.PrepareForShutdown" +) DBUS_SIGNAL_PROPERTIES_CHANGED = "org.freedesktop.DBus.Properties.PropertiesChanged" DBUS_SIGNAL_RAUC_INSTALLER_COMPLETED = "de.pengutronix.rauc.Installer.Completed" diff --git a/supervisor/dbus/logind.py b/supervisor/dbus/logind.py index c3a1834f4ce..b62113f5eb9 100644 --- a/supervisor/dbus/logind.py +++ b/supervisor/dbus/logind.py @@ -5,7 +5,12 @@ from dbus_fast.aio.message_bus import MessageBus from ..exceptions import DBusError, DBusInterfaceError, DBusServiceUnkownError -from .const import DBUS_NAME_LOGIND, DBUS_OBJECT_LOGIND +from ..utils.dbus import DBusSignalWrapper +from .const import ( + DBUS_NAME_LOGIND, + DBUS_OBJECT_LOGIND, + DBUS_SIGNAL_LOGIND_PREPARE_FOR_SHUTDOWN, +) from .interface import DBusInterface from .utils import dbus_connected @@ -29,7 +34,7 @@ async def connect(self, bus: MessageBus): await super().connect(bus) except DBusError: _LOGGER.warning("Can't connect to systemd-logind") - except (DBusServiceUnkownError, DBusInterfaceError): + except DBusServiceUnkownError, DBusInterfaceError: _LOGGER.warning("No systemd-logind support on the host.") @dbus_connected @@ -41,3 +46,13 @@ async def reboot(self) -> None: async def power_off(self) -> None: """Power off host computer.""" await self.connected_dbus.Manager.call("power_off", False) + + @dbus_connected + async def inhibit(self, what: str, who: str, why: str, mode: str) -> int: + """Take an inhibitor lock. Returns a file descriptor.""" + return await self.connected_dbus.Manager.call("inhibit", what, who, why, mode) + + @dbus_connected + def prepare_for_shutdown(self) -> DBusSignalWrapper: + """Return a signal wrapper for PrepareForShutdown signal.""" + return self.connected_dbus.signal(DBUS_SIGNAL_LOGIND_PREPARE_FOR_SHUTDOWN) diff --git a/supervisor/dbus/manager.py b/supervisor/dbus/manager.py index 201165f2819..ed417e31574 100644 --- a/supervisor/dbus/manager.py +++ b/supervisor/dbus/manager.py @@ -123,7 +123,7 @@ async def load(self) -> None: try: self._bus = connected_bus = await MessageBus( - bus_type=BusType.SYSTEM + bus_type=BusType.SYSTEM, negotiate_unix_fd=True ).connect() except Exception as err: raise DBusFatalError( diff --git a/supervisor/host/manager.py b/supervisor/host/manager.py index a8a94f5b928..35d45d97446 100644 --- a/supervisor/host/manager.py +++ b/supervisor/host/manager.py @@ -1,15 +1,17 @@ """Host function like audio, D-Bus or systemd.""" +import asyncio from contextlib import suppress from functools import lru_cache import logging +import os from typing import Self from awesomeversion import AwesomeVersion -from ..const import BusEvent +from ..const import BusEvent, CoreState from ..coresys import CoreSys, CoreSysAttributes -from ..exceptions import HassioError, HostLogError, PulseAudioError +from ..exceptions import DBusError, HassioError, HostLogError, PulseAudioError from ..hardware.const import PolicyGroup from ..hardware.data import Device from .apparmor import AppArmorControl @@ -38,6 +40,7 @@ def __init__(self, coresys: CoreSys): self._network: NetworkManager = NetworkManager(coresys) self._sound: SoundControl = SoundControl(coresys) self._logs: LogsControl = LogsControl(coresys) + self._shutdown_monitor_task: asyncio.Task | None = None async def post_init(self) -> Self: """Post init actions that must occur in event loop.""" @@ -180,6 +183,71 @@ async def load(self): except HassioError as err: _LOGGER.warning("Loading host AppArmor on start failed: %s", err) + # Start monitoring for host shutdown signals (ACPI power button, etc.) + if self.sys_dbus.logind.is_connected: + self._shutdown_monitor_task = self.sys_create_task( + self._monitor_host_shutdown() + ) + + async def unload(self) -> None: + """Shutdown host manager and cancel background tasks.""" + if self._shutdown_monitor_task and not self._shutdown_monitor_task.done(): + self._shutdown_monitor_task.cancel() + with suppress(asyncio.CancelledError): + await self._shutdown_monitor_task + self._shutdown_monitor_task = None + + async def _monitor_host_shutdown(self) -> None: + """Monitor for host shutdown via logind PrepareForShutdown signal. + + Takes an inhibitor lock to delay shutdown while we gracefully stop + all running services. When PrepareForShutdown fires, runs the graceful + shutdown sequence and then releases the lock so the host can proceed. + """ + try: + inhibit_fd: int = await self.sys_dbus.logind.inhibit( + "shutdown", + "Home Assistant Supervisor", + "Gracefully stopping running services", + "delay", + ) + except DBusError as err: + _LOGGER.warning( + "Could not take shutdown inhibitor lock from logind: %s", err + ) + return + + _LOGGER.info("Shutdown inhibitor lock acquired from logind") + + try: + async with self.sys_dbus.logind.prepare_for_shutdown() as signal: + while True: + msg = await signal.wait_for_signal() + active = msg[0] + if not active: + continue + + # Only handle if Supervisor didn't initiate the shutdown + if self.sys_core.state != CoreState.RUNNING: + _LOGGER.debug( + "PrepareForShutdown received but already in state %s", + self.sys_core.state, + ) + break + + _LOGGER.info( + "Host shutdown/reboot detected, gracefully stopping services" + ) + await self.sys_core.shutdown() + break + except (DBusError, OSError) as err: + _LOGGER.warning("Error monitoring host shutdown signal: %s", err) + finally: + if isinstance(inhibit_fd, int): + with suppress(OSError): + await self.sys_run_in_executor(os.close, inhibit_fd) + _LOGGER.info("Shutdown inhibitor lock released") + async def _hardware_events(self, device: Device) -> None: """Process hardware requests.""" if self.sys_hardware.policy.is_match_cgroup(PolicyGroup.AUDIO, device): diff --git a/tests/dbus/test_login.py b/tests/dbus/test_login.py index bada0476b7b..4054192220f 100644 --- a/tests/dbus/test_login.py +++ b/tests/dbus/test_login.py @@ -45,6 +45,37 @@ async def test_power_off(logind_service: LogindService, dbus_session_bus: Messag assert logind_service.PowerOff.calls == [(False,)] +async def test_inhibit(logind_service: LogindService, dbus_session_bus: MessageBus): + """Test taking an inhibitor lock.""" + logind_service.Inhibit.calls.clear() + logind = Logind() + + with pytest.raises(DBusNotConnectedError): + await logind.inhibit("shutdown", "test", "testing", "delay") + + await logind.connect(dbus_session_bus) + + await logind.inhibit("shutdown", "Test", "Testing inhibit", "delay") + assert logind_service.Inhibit.calls == [ + ("shutdown", "Test", "Testing inhibit", "delay") + ] + + +async def test_prepare_for_shutdown_signal( + logind_service: LogindService, dbus_session_bus: MessageBus +): + """Test PrepareForShutdown signal.""" + logind = Logind() + await logind.connect(dbus_session_bus) + + async with logind.prepare_for_shutdown() as signal: + logind_service.PrepareForShutdown() + await logind_service.ping() + + msg = await signal.wait_for_signal() + assert msg == [True] + + async def test_dbus_logind_connect_error( dbus_session_bus: MessageBus, caplog: pytest.LogCaptureFixture ): diff --git a/tests/dbus_service_mocks/logind.py b/tests/dbus_service_mocks/logind.py index fbd27f362a2..d7082f2813a 100644 --- a/tests/dbus_service_mocks/logind.py +++ b/tests/dbus_service_mocks/logind.py @@ -1,6 +1,10 @@ """Mock of logind dbus service.""" +import os +import tempfile + from dbus_fast import DBusError +from dbus_fast.service import signal from .base import DBusServiceMock, dbus_method @@ -31,3 +35,15 @@ def Reboot(self, interactive: "b") -> None: @dbus_method() def PowerOff(self, interactive: "b") -> None: """PowerOff.""" + + @dbus_method() + def Inhibit(self, what: "s", who: "s", why: "s", mode: "s") -> "h": + """Take an inhibitor lock. Returns a file descriptor.""" + fd, path = tempfile.mkstemp() + os.unlink(path) + return fd + + @signal() + def PrepareForShutdown(self) -> "b": + """Signal prepare for shutdown.""" + return True diff --git a/tests/host/test_manager.py b/tests/host/test_manager.py index b43963838cf..87d2f8033c7 100644 --- a/tests/host/test_manager.py +++ b/tests/host/test_manager.py @@ -1,18 +1,29 @@ """Test host manager.""" -from unittest.mock import patch +import asyncio +from unittest.mock import AsyncMock, patch from awesomeversion import AwesomeVersion import pytest +from supervisor.const import CoreState from supervisor.coresys import CoreSys from supervisor.dbus.const import MulticastProtocolEnabled from tests.dbus_service_mocks.base import DBusServiceMock +from tests.dbus_service_mocks.logind import Logind as LogindService from tests.dbus_service_mocks.rauc import Rauc as RaucService from tests.dbus_service_mocks.systemd import Systemd as SystemdService +@pytest.fixture(name="logind_service") +async def fixture_logind_service( + all_dbus_services: dict[str, DBusServiceMock | dict[str, DBusServiceMock]], +) -> LogindService: + """Return logind service mock.""" + yield all_dbus_services["logind"] + + @pytest.fixture(name="systemd_service") async def fixture_systemd_service( all_dbus_services: dict[str, DBusServiceMock | dict[str, DBusServiceMock]], @@ -71,3 +82,49 @@ async def test_reload_os( await coresys.host.reload() assert rauc_service.GetSlotStatus.calls == [()] + + +async def test_host_shutdown_on_prepare_for_shutdown_signal( + coresys: CoreSys, logind_service: LogindService +): + """Test graceful shutdown when PrepareForShutdown signal is received.""" + shutdown_called = asyncio.Event() + + async def mock_shutdown(**kwargs): + shutdown_called.set() + + await coresys.host.load() + await coresys.core.set_state(CoreState.RUNNING) + + # Give the monitor task time to start and register the signal listener + # (needs multiple yields for inhibit D-Bus call + AddMatch call) + await asyncio.sleep(0.1) + + with patch.object(coresys.core, "shutdown", side_effect=mock_shutdown): + # Emit PrepareForShutdown(true) signal as if host is shutting down + logind_service.PrepareForShutdown() + await logind_service.ping() + + async with asyncio.timeout(2): + await shutdown_called.wait() + + +async def test_host_shutdown_signal_ignored_when_not_running( + coresys: CoreSys, logind_service: LogindService +): + """Test PrepareForShutdown is ignored if Supervisor already shutting down.""" + await coresys.host.load() + await coresys.core.set_state(CoreState.SHUTDOWN) + + # Give the monitor task time to start and register the signal listener + await asyncio.sleep(0.1) + + with patch.object( + coresys.core, "shutdown", new_callable=AsyncMock + ) as mock_shutdown: + logind_service.PrepareForShutdown() + await logind_service.ping() + # Give the monitor task time to process the signal + await asyncio.sleep(0.1) + + mock_shutdown.assert_not_called() From b4e5796846781da2ad160515b3251aa4c4386046 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Fri, 13 Mar 2026 16:51:13 +0100 Subject: [PATCH 2/9] Close fd in tests --- tests/dbus/test_login.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/dbus/test_login.py b/tests/dbus/test_login.py index 4054192220f..8c6fb388e1c 100644 --- a/tests/dbus/test_login.py +++ b/tests/dbus/test_login.py @@ -1,6 +1,8 @@ """Test login dbus interface.""" # pylint: disable=import-error +import os + from dbus_fast.aio.message_bus import MessageBus import pytest @@ -55,10 +57,12 @@ async def test_inhibit(logind_service: LogindService, dbus_session_bus: MessageB await logind.connect(dbus_session_bus) - await logind.inhibit("shutdown", "Test", "Testing inhibit", "delay") + fd = await logind.inhibit("shutdown", "Test", "Testing inhibit", "delay") assert logind_service.Inhibit.calls == [ ("shutdown", "Test", "Testing inhibit", "delay") ] + if fd is not None: + os.close(fd) async def test_prepare_for_shutdown_signal( From d8fe08b70ae60a02d1d1f1aa3cfc1655e82baeba Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Fri, 13 Mar 2026 16:53:58 +0100 Subject: [PATCH 3/9] Improve error handling in shutdown inhibit lock acquisition and release --- supervisor/host/manager.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/supervisor/host/manager.py b/supervisor/host/manager.py index 35d45d97446..6edfa0fecb5 100644 --- a/supervisor/host/manager.py +++ b/supervisor/host/manager.py @@ -11,7 +11,13 @@ from ..const import BusEvent, CoreState from ..coresys import CoreSys, CoreSysAttributes -from ..exceptions import DBusError, HassioError, HostLogError, PulseAudioError +from ..exceptions import ( + DBusError, + DBusNotConnectedError, + HassioError, + HostLogError, + PulseAudioError, +) from ..hardware.const import PolicyGroup from ..hardware.data import Device from .apparmor import AppArmorControl @@ -195,7 +201,7 @@ async def unload(self) -> None: self._shutdown_monitor_task.cancel() with suppress(asyncio.CancelledError): await self._shutdown_monitor_task - self._shutdown_monitor_task = None + self._shutdown_monitor_task = None async def _monitor_host_shutdown(self) -> None: """Monitor for host shutdown via logind PrepareForShutdown signal. @@ -211,7 +217,7 @@ async def _monitor_host_shutdown(self) -> None: "Gracefully stopping running services", "delay", ) - except DBusError as err: + except (DBusError, DBusNotConnectedError) as err: _LOGGER.warning( "Could not take shutdown inhibitor lock from logind: %s", err ) @@ -240,7 +246,7 @@ async def _monitor_host_shutdown(self) -> None: ) await self.sys_core.shutdown() break - except (DBusError, OSError) as err: + except (DBusError, DBusNotConnectedError, OSError) as err: _LOGGER.warning("Error monitoring host shutdown signal: %s", err) finally: if isinstance(inhibit_fd, int): From 2edd7425b356cb6ee17884486d06a0bb4e778e3c Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Fri, 13 Mar 2026 17:25:34 +0100 Subject: [PATCH 4/9] Make core.shutdown() reentrant and remove monitor state guard Make Core.shutdown() reentrant using an asyncio.Event so that concurrent callers await the in-progress shutdown instead of starting a second one. This ensures the inhibitor lock is held until shutdown truly completes, even if PrepareForShutdown fires while an API-initiated shutdown is already in progress. The state guard in the monitor task is no longer needed since core.shutdown() now handles reentrance itself. Co-Authored-By: Claude Opus 4.6 --- supervisor/core.py | 42 ++++++++++++++++++++++++-------------- supervisor/host/manager.py | 10 +-------- tests/host/test_manager.py | 21 +++++++++++-------- 3 files changed, 40 insertions(+), 33 deletions(-) diff --git a/supervisor/core.py b/supervisor/core.py index 121d2168148..6e94f4ca0f8 100644 --- a/supervisor/core.py +++ b/supervisor/core.py @@ -42,6 +42,7 @@ def __init__(self, coresys: CoreSys) -> None: self.coresys: CoreSys = coresys self._state: CoreState = CoreState.INITIALIZE self.exit_code: int = 0 + self._shutdown_event: asyncio.Event = asyncio.Event() @property def state(self) -> CoreState: @@ -353,28 +354,39 @@ async def stop(self) -> None: self.sys_loop.stop() async def shutdown(self, *, remove_homeassistant_container: bool = False) -> None: - """Shutdown all running containers in correct order.""" + """Shutdown all running containers in correct order. + + Reentrant: if a shutdown is already in progress, subsequent calls + await completion of the existing shutdown rather than starting a second one. + """ + if self.state in (CoreState.SHUTDOWN, CoreState.STOPPING, CoreState.CLOSE): + await self._shutdown_event.wait() + return + # don't process scheduler anymore if self.state == CoreState.RUNNING: await self.set_state(CoreState.SHUTDOWN) - # Shutdown Application Add-ons, using Home Assistant API - await self.sys_addons.shutdown(AddonStartup.APPLICATION) + try: + # Shutdown Application Add-ons, using Home Assistant API + await self.sys_addons.shutdown(AddonStartup.APPLICATION) - # Close Home Assistant - with suppress(HassioError): - await self.sys_homeassistant.core.stop( - remove_container=remove_homeassistant_container - ) + # Close Home Assistant + with suppress(HassioError): + await self.sys_homeassistant.core.stop( + remove_container=remove_homeassistant_container + ) - # Shutdown System Add-ons - await self.sys_addons.shutdown(AddonStartup.SERVICES) - await self.sys_addons.shutdown(AddonStartup.SYSTEM) - await self.sys_addons.shutdown(AddonStartup.INITIALIZE) + # Shutdown System Add-ons + await self.sys_addons.shutdown(AddonStartup.SERVICES) + await self.sys_addons.shutdown(AddonStartup.SYSTEM) + await self.sys_addons.shutdown(AddonStartup.INITIALIZE) - # Shutdown all Plugins - if self.state in (CoreState.STOPPING, CoreState.SHUTDOWN): - await self.sys_plugins.shutdown() + # Shutdown all Plugins + if self.state in (CoreState.STOPPING, CoreState.SHUTDOWN): + await self.sys_plugins.shutdown() + finally: + self._shutdown_event.set() async def _update_last_boot(self) -> None: """Update last boot time.""" diff --git a/supervisor/host/manager.py b/supervisor/host/manager.py index 6edfa0fecb5..036e92e6b16 100644 --- a/supervisor/host/manager.py +++ b/supervisor/host/manager.py @@ -9,7 +9,7 @@ from awesomeversion import AwesomeVersion -from ..const import BusEvent, CoreState +from ..const import BusEvent from ..coresys import CoreSys, CoreSysAttributes from ..exceptions import ( DBusError, @@ -233,14 +233,6 @@ async def _monitor_host_shutdown(self) -> None: if not active: continue - # Only handle if Supervisor didn't initiate the shutdown - if self.sys_core.state != CoreState.RUNNING: - _LOGGER.debug( - "PrepareForShutdown received but already in state %s", - self.sys_core.state, - ) - break - _LOGGER.info( "Host shutdown/reboot detected, gracefully stopping services" ) diff --git a/tests/host/test_manager.py b/tests/host/test_manager.py index 87d2f8033c7..b5264dfb66a 100644 --- a/tests/host/test_manager.py +++ b/tests/host/test_manager.py @@ -1,7 +1,7 @@ """Test host manager.""" import asyncio -from unittest.mock import AsyncMock, patch +from unittest.mock import patch from awesomeversion import AwesomeVersion import pytest @@ -109,22 +109,25 @@ async def mock_shutdown(**kwargs): await shutdown_called.wait() -async def test_host_shutdown_signal_ignored_when_not_running( +async def test_host_shutdown_signal_reentrant( coresys: CoreSys, logind_service: LogindService ): - """Test PrepareForShutdown is ignored if Supervisor already shutting down.""" + """Test PrepareForShutdown during in-progress shutdown awaits same shutdown.""" + shutdown_called = asyncio.Event() + + async def mock_shutdown(**kwargs): + shutdown_called.set() + await coresys.host.load() await coresys.core.set_state(CoreState.SHUTDOWN) # Give the monitor task time to start and register the signal listener await asyncio.sleep(0.1) - with patch.object( - coresys.core, "shutdown", new_callable=AsyncMock - ) as mock_shutdown: + with patch.object(coresys.core, "shutdown", side_effect=mock_shutdown): logind_service.PrepareForShutdown() await logind_service.ping() - # Give the monitor task time to process the signal - await asyncio.sleep(0.1) - mock_shutdown.assert_not_called() + # shutdown() is called reentrantly - it awaits the in-progress shutdown + async with asyncio.timeout(2): + await shutdown_called.wait() From 1294d919ea43cfb65f547c33015a893669c89402 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Fri, 13 Mar 2026 17:40:28 +0100 Subject: [PATCH 5/9] Reset shutdown event for backup restore --- supervisor/core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/supervisor/core.py b/supervisor/core.py index 6e94f4ca0f8..0fd57985a47 100644 --- a/supervisor/core.py +++ b/supervisor/core.py @@ -363,6 +363,9 @@ async def shutdown(self, *, remove_homeassistant_container: bool = False) -> Non await self._shutdown_event.wait() return + # Reset event for this shutdown cycle (supports repeated use, e.g. backup restore) + self._shutdown_event.clear() + # don't process scheduler anymore if self.state == CoreState.RUNNING: await self.set_state(CoreState.SHUTDOWN) From 67ff4635eb4a38bc26a6856e076e7bb3f12ecad4 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 16 Mar 2026 13:36:22 +0100 Subject: [PATCH 6/9] Increase test coverage --- tests/host/test_manager.py | 98 +++++++++++++++++++++++++++++++++++++- tests/test_core.py | 65 +++++++++++++++++++++++++ 2 files changed, 162 insertions(+), 1 deletion(-) diff --git a/tests/host/test_manager.py b/tests/host/test_manager.py index b5264dfb66a..bdf23d4a09c 100644 --- a/tests/host/test_manager.py +++ b/tests/host/test_manager.py @@ -1,7 +1,8 @@ """Test host manager.""" import asyncio -from unittest.mock import patch +import os +from unittest.mock import AsyncMock, PropertyMock, patch from awesomeversion import AwesomeVersion import pytest @@ -9,6 +10,7 @@ from supervisor.const import CoreState from supervisor.coresys import CoreSys from supervisor.dbus.const import MulticastProtocolEnabled +from supervisor.exceptions import DBusError from tests.dbus_service_mocks.base import DBusServiceMock from tests.dbus_service_mocks.logind import Logind as LogindService @@ -131,3 +133,97 @@ async def mock_shutdown(**kwargs): # shutdown() is called reentrantly - it awaits the in-progress shutdown async with asyncio.timeout(2): await shutdown_called.wait() + + +async def test_host_unload_cancels_monitor_task( + coresys: CoreSys, logind_service: LogindService +): + """Test unload cancels the shutdown monitor task.""" + await coresys.host.load() + await asyncio.sleep(0.1) + + assert coresys.host._shutdown_monitor_task is not None + assert not coresys.host._shutdown_monitor_task.done() + + await coresys.host.unload() + + assert coresys.host._shutdown_monitor_task is None + + +async def test_host_unload_no_monitor_task(coresys: CoreSys): + """Test unload when no monitor task was started.""" + # Don't call load(), so no monitor task exists + assert coresys.host._shutdown_monitor_task is None + await coresys.host.unload() + assert coresys.host._shutdown_monitor_task is None + + +async def test_monitor_inhibit_lock_failure( + coresys: CoreSys, + logind_service: LogindService, + caplog: pytest.LogCaptureFixture, +): + """Test monitor task logs warning when inhibit lock fails.""" + with patch.object( + coresys.dbus.logind, "inhibit", side_effect=DBusError("test error") + ): + await coresys.host.load() + await asyncio.sleep(0.1) + + assert "Could not take shutdown inhibitor lock from logind" in caplog.text + + +async def test_monitor_dbus_error_during_signal_wait( + coresys: CoreSys, + logind_service: LogindService, + caplog: pytest.LogCaptureFixture, +): + """Test monitor task handles D-Bus errors during signal monitoring.""" + with patch.object( + coresys.dbus.logind, + "prepare_for_shutdown", + side_effect=DBusError("connection lost"), + ): + await coresys.host.load() + await asyncio.sleep(0.1) + + assert "Error monitoring host shutdown signal" in caplog.text + + +async def test_inhibitor_lock_released_after_shutdown( + coresys: CoreSys, + logind_service: LogindService, + caplog: pytest.LogCaptureFixture, +): + """Test that the inhibitor lock FD is closed after shutdown completes.""" + # Mock inhibit to return a real FD (session bus doesn't negotiate unix FDs) + r_fd, w_fd = os.pipe() + os.close(w_fd) + + with patch.object( + coresys.dbus.logind, "inhibit", new_callable=AsyncMock, return_value=r_fd + ): + await coresys.host.load() + await asyncio.sleep(0.1) + + await coresys.core.set_state(CoreState.RUNNING) + + with patch.object(coresys.core, "shutdown", new_callable=AsyncMock): + logind_service.PrepareForShutdown() + await logind_service.ping() + await asyncio.sleep(0.2) + + assert "Shutdown inhibitor lock released" in caplog.text + + +async def test_no_monitor_task_without_logind(coresys: CoreSys): + """Test no monitor task is started when logind is not connected.""" + with patch.object( + type(coresys.dbus.logind), + "is_connected", + new_callable=PropertyMock, + return_value=False, + ): + await coresys.host.load() + + assert coresys.host._shutdown_monitor_task is None diff --git a/tests/test_core.py b/tests/test_core.py index a393fe87523..505574bf804 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,6 +1,7 @@ """Testing handling with CoreState.""" # pylint: disable=W0212 +import asyncio import datetime import errno from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch @@ -146,3 +147,67 @@ async def test_write_state_failure( assert "Can't update the Supervisor state" in caplog.text assert coresys.core.state == CoreState.RUNNING + + +async def test_shutdown_reentrant_waits(coresys: CoreSys): + """Test that concurrent shutdown calls wait for the first to complete.""" + call_count = 0 + shutdown_started = asyncio.Event() + proceed = asyncio.Event() + + original_shutdown = coresys.addons.shutdown + + async def slow_addon_shutdown(startup): + nonlocal call_count + call_count += 1 + shutdown_started.set() + await proceed.wait() + return await original_shutdown(startup) + + await coresys.core.set_state(CoreState.RUNNING) + + with patch.object(coresys.addons, "shutdown", side_effect=slow_addon_shutdown): + # Start first shutdown + task1 = asyncio.create_task(coresys.core.shutdown()) + await shutdown_started.wait() + + # Second call should wait, not start a new shutdown + task2 = asyncio.create_task(coresys.core.shutdown()) + await asyncio.sleep(0.05) + + # Let the shutdown proceed + proceed.set() + + await asyncio.gather(task1, task2) + + # Addon shutdown was only called by the first shutdown (4 startup levels) + assert call_count == 4 + assert coresys.core._shutdown_event.is_set() + + +async def test_shutdown_event_reset_between_cycles(coresys: CoreSys): + """Test that shutdown event is reset for repeated shutdown cycles (e.g. backup restore).""" + await coresys.core.set_state(CoreState.FREEZE) + + # First shutdown cycle + await coresys.core.shutdown() + assert coresys.core._shutdown_event.is_set() + + # Simulate backup restore returning to RUNNING + await coresys.core.set_state(CoreState.RUNNING) + + # Second shutdown cycle should work (event was cleared) + second_entered = False + + original_shutdown = coresys.addons.shutdown + + async def track_addon_shutdown(startup): + nonlocal second_entered + second_entered = True + return await original_shutdown(startup) + + with patch.object(coresys.addons, "shutdown", side_effect=track_addon_shutdown): + await coresys.core.shutdown() + + assert second_entered + assert coresys.core._shutdown_event.is_set() From 58bb0c909d5d52a6d16632ac5b6905e56b50212b Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 16 Mar 2026 14:22:25 +0100 Subject: [PATCH 7/9] Guard shutdown() for STOPPING/CLOSE and unload host early in stop() Move host.unload() before Stage 1 in stop() so the shutdown monitor task is cancelled before infrastructure teardown begins. This prevents a race where the monitor could react to PrepareForShutdown after stop() has already started tearing down. For the remaining edge case where shutdown() is called while stop() is running, log a warning and return immediately instead of awaiting the shutdown event (which would deadlock since stop() never sets it). Co-Authored-By: Claude Opus 4.6 (1M context) --- supervisor/core.py | 12 ++++++++++-- tests/test_core.py | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/supervisor/core.py b/supervisor/core.py index 0fd57985a47..4ce1bb08dbf 100644 --- a/supervisor/core.py +++ b/supervisor/core.py @@ -315,6 +315,9 @@ async def stop(self) -> None: # don't process scheduler anymore await self.set_state(CoreState.STOPPING) + # Cancel shutdown monitor task before tearing down infrastructure + await self.sys_host.unload() + # Stage 1 try: async with asyncio.timeout(10): @@ -341,7 +344,6 @@ async def stop(self) -> None: self.sys_websession.close(), self.sys_ingress.unload(), self.sys_hardware.unload(), - self.sys_host.unload(), self.sys_dbus.unload(), ) ] @@ -359,7 +361,13 @@ async def shutdown(self, *, remove_homeassistant_container: bool = False) -> Non Reentrant: if a shutdown is already in progress, subsequent calls await completion of the existing shutdown rather than starting a second one. """ - if self.state in (CoreState.SHUTDOWN, CoreState.STOPPING, CoreState.CLOSE): + # Supervisor is already tearing down, no point running shutdown + if self.state in (CoreState.STOPPING, CoreState.CLOSE): + _LOGGER.warning("Ignoring shutdown request, Supervisor is already stopping") + return + + # Another shutdown is in progress, wait for it to complete + if self.state == CoreState.SHUTDOWN: await self._shutdown_event.wait() return diff --git a/tests/test_core.py b/tests/test_core.py index 505574bf804..be30243e888 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -211,3 +211,19 @@ async def track_addon_shutdown(startup): assert second_entered assert coresys.core._shutdown_event.is_set() + + +@pytest.mark.parametrize( + "state", [CoreState.STOPPING, CoreState.CLOSE], ids=["stopping", "close"] +) +async def test_shutdown_ignored_during_stop( + coresys: CoreSys, caplog: pytest.LogCaptureFixture, state: CoreState +): + """Test that shutdown is ignored when Supervisor is already stopping.""" + await coresys.core.set_state(state) + + with patch.object(coresys.addons, "shutdown") as mock_addon_shutdown: + await coresys.core.shutdown() + + mock_addon_shutdown.assert_not_called() + assert "Ignoring shutdown request, Supervisor is already stopping" in caplog.text From def618bccf5041bc3a59e08bb0062129c438cfca Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 16 Mar 2026 15:15:45 +0100 Subject: [PATCH 8/9] Wait for startup to complete before running shutdown If PrepareForShutdown fires while Supervisor is still starting up, wait for startup to complete before running the graceful shutdown sequence. This prevents shutting down partially initialized containers and services. The _startup_complete event is set automatically in set_state() whenever state transitions to RUNNING. If SIGTERM arrives during startup, core.stop() cancels the monitor task via host.unload(), cleanly interrupting the wait. Co-Authored-By: Claude Opus 4.6 --- supervisor/core.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/supervisor/core.py b/supervisor/core.py index 4ce1bb08dbf..4ba8614555a 100644 --- a/supervisor/core.py +++ b/supervisor/core.py @@ -43,6 +43,7 @@ def __init__(self, coresys: CoreSys) -> None: self._state: CoreState = CoreState.INITIALIZE self.exit_code: int = 0 self._shutdown_event: asyncio.Event = asyncio.Event() + self._startup_complete: asyncio.Event = asyncio.Event() @property def state(self) -> CoreState: @@ -83,6 +84,9 @@ async def set_state(self, new_state: CoreState) -> None: self._state = new_state await self._write_run_state() + if self._state == CoreState.RUNNING: + self._startup_complete.set() + # Don't attempt to notify anyone on CLOSE as we're about to stop the event loop if self._state != CoreState.CLOSE: self.sys_bus.fire_event(BusEvent.SUPERVISOR_STATE_CHANGE, self._state) @@ -361,6 +365,12 @@ async def shutdown(self, *, remove_homeassistant_container: bool = False) -> Non Reentrant: if a shutdown is already in progress, subsequent calls await completion of the existing shutdown rather than starting a second one. """ + if self.state in STARTING_STATES: + _LOGGER.debug( + "Shutdown requested while Supervisor is still starting up, waiting for startup to complete" + ) + await self._startup_complete.wait() + # Supervisor is already tearing down, no point running shutdown if self.state in (CoreState.STOPPING, CoreState.CLOSE): _LOGGER.warning("Ignoring shutdown request, Supervisor is already stopping") From 1615b50e6b4cd45a4e6b3235b95a924895efd48e Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 16 Mar 2026 16:24:46 +0100 Subject: [PATCH 9/9] Fix pytests --- tests/os/test_data_disk.py | 1 + tests/resolution/fixup/test_system_adopt_data_disk.py | 4 ++++ tests/resolution/fixup/test_system_execute_reboot.py | 2 ++ 3 files changed, 7 insertions(+) diff --git a/tests/os/test_data_disk.py b/tests/os/test_data_disk.py index 63b25e520cc..2b31e1654f6 100644 --- a/tests/os/test_data_disk.py +++ b/tests/os/test_data_disk.py @@ -271,6 +271,7 @@ async def test_datadisk_migrate_between_external_renames( all_dbus_services["os_agent"].emit_properties_changed({"Version": "1.5.0"}) await all_dbus_services["os_agent"].ping() + await coresys.core.set_state(CoreState.RUNNING) await coresys.os.datadisk.migrate_disk("Generic-Flash-Disk-61BCDDB6") assert datadisk_service.MarkDataMove.calls == [()] diff --git a/tests/resolution/fixup/test_system_adopt_data_disk.py b/tests/resolution/fixup/test_system_adopt_data_disk.py index 1366cebcb51..cbb24399d0e 100644 --- a/tests/resolution/fixup/test_system_adopt_data_disk.py +++ b/tests/resolution/fixup/test_system_adopt_data_disk.py @@ -5,6 +5,7 @@ from dbus_fast import DBusError, ErrorType, Variant import pytest +from supervisor.const import CoreState from supervisor.coresys import CoreSys from supervisor.resolution.const import ContextType, IssueType, SuggestionType from supervisor.resolution.data import Issue, Suggestion @@ -98,6 +99,7 @@ async def test_fixup( ["/org/freedesktop/UDisks2/block_devices/mmcblk1p3"], ] + await coresys.core.set_state(CoreState.RUNNING) await system_adopt_data_disk() assert mmcblk1p3_filesystem_service.SetLabel.calls == [ @@ -176,6 +178,7 @@ async def test_fixup_reboot_failed( ["/org/freedesktop/UDisks2/block_devices/mmcblk1p3"], ] + await coresys.core.set_state(CoreState.RUNNING) await system_adopt_data_disk() assert mmcblk1p3_filesystem_service.SetLabel.calls == [ @@ -228,6 +231,7 @@ async def test_fixup_disabled_data_disk( ["/org/freedesktop/UDisks2/block_devices/mmcblk1p3"], ] + await coresys.core.set_state(CoreState.RUNNING) await system_adopt_data_disk() assert mmcblk1p3_filesystem_service.SetLabel.calls == [ diff --git a/tests/resolution/fixup/test_system_execute_reboot.py b/tests/resolution/fixup/test_system_execute_reboot.py index 084cfcde75d..f14e4641fca 100644 --- a/tests/resolution/fixup/test_system_execute_reboot.py +++ b/tests/resolution/fixup/test_system_execute_reboot.py @@ -1,5 +1,6 @@ """Test fixup system reboot.""" +from supervisor.const import CoreState from supervisor.coresys import CoreSys from supervisor.resolution.const import ContextType, IssueType, SuggestionType from supervisor.resolution.data import Issue, Suggestion @@ -25,6 +26,7 @@ async def test_fixup( ) coresys.resolution.add_issue(Issue(IssueType.REBOOT_REQUIRED, ContextType.SYSTEM)) + await coresys.core.set_state(CoreState.RUNNING) await system_execute_reboot() assert logind_service.Reboot.calls == [(False,)]