Skip to content

Commit ad1a911

Browse files
agnersclaude
andauthored
Improve and extend frontend probe after update with WebSocket check (#6811)
* Improve and extend frontend probe after update with WebSocket check The post-update health check introduced in #6311 added HomeAssistantAPI.check_frontend_available, which fetched the frontend through the existing Supervisor-internal API connection to Core. Since #6742 that connection optionally runs over a Unix socket with no authentication, so the request no longer exercises the same transport, auth and routing path that an external HTTP client uses. Move the frontend probe out of HomeAssistantAPI into a small frontend_check module that talks to Core's TCP endpoints via the plain websession with no authentication, mirroring what an external client would see. While doing this, extend the post-update verification to also probe the WebSocket endpoint: open /api/websocket and confirm the first frame is the auth_required text message. This catches the kind of WebSocket breakage seen in #6802, where api/config still listed websocket_api as loaded and GET / still returned HTML, but the WebSocket handshake completed with an immediate close frame and the frontend was unusable. The component check now also requires "http" to be loaded, in addition to "frontend" and "websocket_api", and iterates so every missing component is logged. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * Address review feedback on WebSocket probe - Wrap ws_connect in asyncio.wait_for so the handshake has an explicit bounded timeout (the global websession's default timeout would otherwise apply). - Validate that the auth_required payload is a JSON object before calling .get("type"); a list/string would otherwise raise AttributeError at runtime. - Add a regression test covering a non-dict JSON payload. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 179c7f0 commit ad1a911

6 files changed

Lines changed: 303 additions & 94 deletions

File tree

supervisor/homeassistant/api.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -368,32 +368,3 @@ async def check_api_state(self) -> bool:
368368
if state := await self.get_api_state():
369369
return state.core_state == "RUNNING" or state.offline_db_migration
370370
return False
371-
372-
async def check_frontend_available(self) -> bool:
373-
"""Check if the frontend is accessible by fetching the root path.
374-
375-
Caller should make sure that Home Assistant Core is running before
376-
calling this method.
377-
378-
Returns:
379-
True if the frontend responds successfully, False otherwise.
380-
381-
"""
382-
try:
383-
async with self.make_request("get", "", timeout=30) as resp:
384-
# Frontend should return HTML content
385-
if resp.status == 200:
386-
content_type = resp.headers.get(hdrs.CONTENT_TYPE, "")
387-
if "text/html" in content_type:
388-
_LOGGER.debug("Frontend is accessible and serving HTML")
389-
return True
390-
_LOGGER.warning(
391-
"Frontend responded but with unexpected content type: %s",
392-
content_type,
393-
)
394-
return False
395-
_LOGGER.warning("Frontend returned status %s", resp.status)
396-
return False
397-
except HomeAssistantAPIError as err:
398-
_LOGGER.debug("Cannot reach frontend: %s", err)
399-
return False

supervisor/homeassistant/core.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
WATCHDOG_THROTTLE_MAX_CALLS,
4646
WATCHDOG_THROTTLE_PERIOD,
4747
)
48+
from .frontend_check import verify_frontend
4849

4950
_LOGGER: logging.Logger = logging.getLogger(__name__)
5051

@@ -333,17 +334,19 @@ async def _update(to_version: AwesomeVersion) -> None:
333334
# The API stopped responding between the update and now
334335
self._error_state = True
335336
else:
336-
# Verify that the frontend is loaded
337-
if "frontend" not in data.get("components", []):
338-
_LOGGER.error("API responds but frontend is not loaded")
337+
components = data.get("components", [])
338+
# Verify that the integrations needed to serve the frontend
339+
# are loaded
340+
for required in ("http", "frontend", "websocket_api"):
341+
if required not in components:
342+
_LOGGER.error("API responds but %s is not loaded", required)
343+
self._error_state = True
344+
# Probe the public HTTP/WS endpoints as an external client
345+
# would, to catch cases where integrations are listed as
346+
# loaded but the endpoints don't actually function.
347+
if not self._error_state and not await verify_frontend(self.coresys):
339348
self._error_state = True
340-
# Check that the frontend is actually accessible
341-
elif not await self.sys_homeassistant.api.check_frontend_available():
342-
_LOGGER.error(
343-
"Frontend component loaded but frontend is not accessible"
344-
)
345-
self._error_state = True
346-
else:
349+
if not self._error_state:
347350
# Health checks passed, clean up old image
348351
with suppress(DockerError):
349352
await self.instance.cleanup(old_image=old_image)
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
"""External frontend availability probes for Home Assistant Core.
2+
3+
These probes intentionally bypass the Supervisor-internal API layer
4+
(authentication, Unix socket transport, retries) so they exercise the
5+
same code paths an external HTTP/WebSocket client would. The goal is
6+
to detect cases where Core's HTTP API responds but the user-facing
7+
frontend or WebSocket endpoint is broken (e.g. due to a custom http
8+
component override masking websocket_api).
9+
"""
10+
11+
from __future__ import annotations
12+
13+
import asyncio
14+
from contextlib import suppress
15+
import logging
16+
17+
import aiohttp
18+
from aiohttp import hdrs
19+
20+
from ..coresys import CoreSys
21+
22+
_LOGGER: logging.Logger = logging.getLogger(__name__)
23+
24+
_PROBE_TIMEOUT = aiohttp.ClientTimeout(total=30)
25+
_WS_HANDSHAKE_TIMEOUT = 30.0
26+
_WS_RECEIVE_TIMEOUT = 10.0
27+
28+
29+
async def check_frontend(coresys: CoreSys) -> bool:
30+
"""Verify the frontend serves HTML on the root path."""
31+
url = f"{coresys.homeassistant.api_url}/"
32+
try:
33+
async with coresys.websession.get(
34+
url, timeout=_PROBE_TIMEOUT, ssl=False
35+
) as resp:
36+
if resp.status != 200:
37+
_LOGGER.error("Frontend returned status %s", resp.status)
38+
return False
39+
content_type = resp.headers.get(hdrs.CONTENT_TYPE, "")
40+
if "text/html" not in content_type:
41+
_LOGGER.error(
42+
"Frontend responded with unexpected content type: %s",
43+
content_type,
44+
)
45+
return False
46+
except (aiohttp.ClientError, TimeoutError) as err:
47+
_LOGGER.error("Cannot reach frontend at %s: %s", url, err)
48+
return False
49+
50+
_LOGGER.debug("Frontend is accessible and serving HTML")
51+
return True
52+
53+
54+
async def check_websocket(coresys: CoreSys) -> bool:
55+
"""Verify the WebSocket endpoint accepts a handshake.
56+
57+
We don't authenticate. A working endpoint sends an `auth_required`
58+
text frame immediately after the upgrade; that's enough to confirm
59+
websocket_api is wired up and functional.
60+
"""
61+
url = coresys.homeassistant.ws_url
62+
ws: aiohttp.ClientWebSocketResponse | None = None
63+
try:
64+
ws = await asyncio.wait_for(
65+
coresys.websession.ws_connect(url, ssl=False),
66+
timeout=_WS_HANDSHAKE_TIMEOUT,
67+
)
68+
msg = await ws.receive(timeout=_WS_RECEIVE_TIMEOUT)
69+
if msg.type != aiohttp.WSMsgType.TEXT:
70+
_LOGGER.error("WebSocket handshake returned non-text message: %s", msg.type)
71+
return False
72+
data = msg.json()
73+
if not isinstance(data, dict) or data.get("type") != "auth_required":
74+
_LOGGER.error("WebSocket did not send auth_required, got: %s", data)
75+
return False
76+
except (aiohttp.ClientError, TimeoutError, ValueError) as err:
77+
_LOGGER.error("WebSocket probe to %s failed: %s", url, err)
78+
return False
79+
finally:
80+
if ws is not None:
81+
with suppress(Exception):
82+
await ws.close()
83+
84+
_LOGGER.debug("WebSocket endpoint accepted handshake")
85+
return True
86+
87+
88+
async def verify_frontend(coresys: CoreSys) -> bool:
89+
"""Run both frontend probes; return True only if both succeed."""
90+
return await check_frontend(coresys) and await check_websocket(coresys)

tests/api/test_homeassistant.py

Lines changed: 68 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from supervisor.exceptions import HomeAssistantError
1818
from supervisor.homeassistant.api import APIState, HomeAssistantAPI
1919
from supervisor.homeassistant.const import WSEvent
20+
import supervisor.homeassistant.core as ha_core
2021
from supervisor.homeassistant.core import HomeAssistantCore
2122
from supervisor.homeassistant.module import HomeAssistant
2223
from supervisor.resolution.const import ContextType, IssueType
@@ -315,9 +316,11 @@ async def test_api_progress_updates_home_assistant_update(
315316
new=PropertyMock(return_value=AwesomeVersion("2025.8.0")),
316317
),
317318
patch.object(
318-
HomeAssistantAPI, "get_config", return_value={"components": ["frontend"]}
319+
HomeAssistantAPI,
320+
"get_config",
321+
return_value={"components": ["http", "frontend", "websocket_api"]},
319322
),
320-
patch.object(HomeAssistantAPI, "check_frontend_available", return_value=True),
323+
patch.object(ha_core, "verify_frontend", AsyncMock(return_value=True)),
321324
):
322325
resp = await api_client.post(f"{root}/update", json={"version": "2025.8.3"})
323326

@@ -492,9 +495,11 @@ async def test_update_frontend_check_success(
492495
new=PropertyMock(return_value=AwesomeVersion("2025.8.0")),
493496
),
494497
patch.object(
495-
HomeAssistantAPI, "get_config", return_value={"components": ["frontend"]}
498+
HomeAssistantAPI,
499+
"get_config",
500+
return_value={"components": ["http", "frontend", "websocket_api"]},
496501
),
497-
patch.object(HomeAssistantAPI, "check_frontend_available", return_value=True),
502+
patch.object(ha_core, "verify_frontend", AsyncMock(return_value=True)),
498503
patch.object(DockerInterface, "cleanup") as mock_cleanup,
499504
):
500505
resp = await api_client.post(f"{root}/update", json={"version": "2025.8.3"})
@@ -509,7 +514,7 @@ async def test_update_frontend_check_fails_triggers_rollback(
509514
caplog: pytest.LogCaptureFixture,
510515
tmp_supervisor_data: Path,
511516
):
512-
"""Test that update triggers rollback when frontend check fails."""
517+
"""Test that update triggers rollback when health probes fail."""
513518
api_client, root = core_api_client_with_root
514519
coresys.hardware.disk.get_disk_free_space = lambda x: 5000
515520
coresys.homeassistant.version = AwesomeVersion("2025.8.0")
@@ -535,16 +540,17 @@ async def mock_update(*args, **kwargs):
535540
new=PropertyMock(return_value=AwesomeVersion("2025.8.0")),
536541
),
537542
patch.object(
538-
HomeAssistantAPI, "get_config", return_value={"components": ["frontend"]}
543+
HomeAssistantAPI,
544+
"get_config",
545+
return_value={"components": ["http", "frontend", "websocket_api"]},
539546
),
540-
patch.object(HomeAssistantAPI, "check_frontend_available", return_value=False),
547+
patch.object(ha_core, "verify_frontend", AsyncMock(return_value=False)),
541548
patch.object(DockerInterface, "cleanup") as mock_cleanup,
542549
):
543550
resp = await api_client.post(f"{root}/update", json={"version": "2025.8.3"})
544551

545552
# Update should trigger rollback, which succeeds and returns 200
546553
assert resp.status == 200
547-
assert "Frontend component loaded but frontend is not accessible" in caplog.text
548554
assert "HomeAssistant update failed -> rollback!" in caplog.text
549555
# Should have called update twice (once for update, once for rollback)
550556
assert update_call_count == 2
@@ -556,6 +562,57 @@ async def mock_update(*args, **kwargs):
556562
mock_cleanup.assert_not_called()
557563

558564

565+
async def test_update_websocket_api_missing_triggers_rollback(
566+
core_api_client_with_root: tuple[TestClient, str],
567+
coresys: CoreSys,
568+
caplog: pytest.LogCaptureFixture,
569+
tmp_supervisor_data: Path,
570+
):
571+
"""Test that update triggers rollback when websocket_api component is not loaded."""
572+
api_client, root = core_api_client_with_root
573+
coresys.hardware.disk.get_disk_free_space = lambda x: 5000
574+
coresys.homeassistant.version = AwesomeVersion("2025.8.0")
575+
576+
update_call_count = 0
577+
578+
async def mock_update(*args, **kwargs):
579+
nonlocal update_call_count
580+
update_call_count += 1
581+
if update_call_count == 1:
582+
coresys.homeassistant.version = AwesomeVersion("2025.8.3")
583+
elif update_call_count == 2:
584+
coresys.homeassistant.version = AwesomeVersion("2025.8.0")
585+
586+
with (
587+
patch.object(DockerInterface, "update", new=mock_update),
588+
patch.object(
589+
DockerHomeAssistant,
590+
"version",
591+
new=PropertyMock(return_value=AwesomeVersion("2025.8.0")),
592+
),
593+
patch.object(
594+
HomeAssistantAPI,
595+
"get_config",
596+
return_value={"components": ["http", "frontend"]},
597+
),
598+
patch.object(
599+
ha_core, "verify_frontend", AsyncMock(return_value=True)
600+
) as mock_frontend_check,
601+
patch.object(DockerInterface, "cleanup") as mock_cleanup,
602+
):
603+
resp = await api_client.post(f"{root}/update", json={"version": "2025.8.3"})
604+
605+
assert resp.status == 200
606+
assert "API responds but websocket_api is not loaded" in caplog.text
607+
assert "HomeAssistant update failed -> rollback!" in caplog.text
608+
assert update_call_count == 2
609+
mock_frontend_check.assert_not_called()
610+
assert (
611+
Issue(IssueType.UPDATE_ROLLBACK, ContextType.CORE) in coresys.resolution.issues
612+
)
613+
mock_cleanup.assert_not_called()
614+
615+
559616
async def test_update_get_config_error_triggers_rollback(
560617
core_api_client_with_root: tuple[TestClient, str],
561618
coresys: CoreSys,
@@ -586,16 +643,16 @@ async def mock_update(*args, **kwargs):
586643
),
587644
patch.object(HomeAssistantAPI, "get_config", side_effect=HomeAssistantError),
588645
patch.object(
589-
HomeAssistantAPI, "check_frontend_available", return_value=True
590-
) as mock_check_frontend,
646+
ha_core, "verify_frontend", AsyncMock(return_value=True)
647+
) as mock_frontend_check,
591648
patch.object(DockerInterface, "cleanup") as mock_cleanup,
592649
):
593650
resp = await api_client.post(f"{root}/update", json={"version": "2025.8.3"})
594651

595652
assert resp.status == 200
596653
assert "HomeAssistant update failed -> rollback!" in caplog.text
597654
assert update_call_count == 2
598-
mock_check_frontend.assert_not_called()
655+
mock_frontend_check.assert_not_called()
599656
assert (
600657
Issue(IssueType.UPDATE_ROLLBACK, ContextType.CORE) in coresys.resolution.issues
601658
)

tests/homeassistant/test_api.py

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from contextlib import asynccontextmanager
44
from unittest.mock import AsyncMock, MagicMock, patch
55

6-
from aiohttp import hdrs
76
from awesomeversion import AwesomeVersion
87
import pytest
98

@@ -17,49 +16,6 @@
1716

1817
from tests.common import MockResponse
1918

20-
# --- check_frontend_available ---
21-
22-
23-
@pytest.mark.parametrize(
24-
("status", "content_type", "expected"),
25-
[
26-
(200, "text/html; charset=utf-8", True),
27-
(404, "text/html", False),
28-
(200, "application/json", False),
29-
],
30-
)
31-
async def test_check_frontend_available(
32-
coresys: CoreSys, status: int, content_type: str, expected: bool
33-
):
34-
"""Test frontend availability based on HTTP status and content type."""
35-
mock_response = MagicMock()
36-
mock_response.status = status
37-
mock_response.headers = {hdrs.CONTENT_TYPE: content_type}
38-
39-
@asynccontextmanager
40-
async def mock_make_request(*args, **kwargs):
41-
yield mock_response
42-
43-
with patch.object(
44-
type(coresys.homeassistant.api), "make_request", new=mock_make_request
45-
):
46-
assert await coresys.homeassistant.api.check_frontend_available() is expected
47-
48-
49-
async def test_check_frontend_available_api_error(coresys: CoreSys):
50-
"""Test frontend availability check handles API errors gracefully."""
51-
52-
@asynccontextmanager
53-
async def mock_make_request(*args, **kwargs):
54-
raise HomeAssistantAPIError("Connection failed")
55-
yield # pragma: no cover
56-
57-
with patch.object(
58-
type(coresys.homeassistant.api), "make_request", new=mock_make_request
59-
):
60-
assert await coresys.homeassistant.api.check_frontend_available() is False
61-
62-
6319
# --- get_config / get_core_state ---
6420

6521

0 commit comments

Comments
 (0)