From 31cfb20eebc30ddcdc7a89cbb0e9d4c3aadb2b53 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Wed, 13 May 2026 14:13:51 +0200 Subject: [PATCH 01/16] feat(gateway): add GET /apps/{id}/belongs-to discovery endpoint Returns the area that contains an app, resolved transitively via the app's parent component and that component's area assignment. Follows the existing relationship-endpoint pattern (is-located-on, depends-on): collection response wrapped in {"items": [...]} with 0 or 1 element. The "belongs-to" resource was already declared in app capabilities; this wires it through with a real handler, route registration, capability URI on the app detail response, unit + integration tests, and REQ_INTEROP_106. closes #196 --- docs/api/rest.rst | 34 ++++++ docs/requirements/specs/discovery.rst | 10 ++ .../core/http/handlers/discovery_handlers.hpp | 6 + .../src/http/handlers/discovery_handlers.cpp | 84 ++++++++++++++ .../src/http/rest_server.cpp | 12 ++ .../test/test_discovery_handlers.cpp | 107 ++++++++++++++++++ .../test/test_entity_resource_model.cpp | 1 + .../test/features/test_hateoas.test.py | 38 +++++++ 8 files changed, 292 insertions(+) diff --git a/docs/api/rest.rst b/docs/api/rest.rst index f2b1bcfb..154e5c27 100644 --- a/docs/api/rest.rst +++ b/docs/api/rest.rst @@ -277,6 +277,40 @@ Apps Unknown apps return ``404 App not found`` with ``parameters.app_id``. +``GET /api/v1/apps/{app_id}/belongs-to`` + Return the area that contains this app via its parent component. + + The response follows the standard ``items`` wrapper and returns: + + - ``0`` items when the app has no associated host component + - ``0`` items when the parent component has no assigned area + - ``1`` item when the area is resolved + - ``1`` item with ``x-medkit.missing=true`` when the parent component references + an area that cannot currently be resolved + + **Example Response:** + + .. code-block:: json + + { + "items": [ + { + "id": "engine", + "name": "Engine", + "href": "/api/v1/areas/engine" + } + ], + "x-medkit": { + "total_count": 1 + }, + "_links": { + "self": "/api/v1/apps/engine-temp-sensor/belongs-to", + "app": "/api/v1/apps/engine-temp-sensor" + } + } + + Unknown apps return ``404 App not found`` with ``parameters.app_id``. + Functions ~~~~~~~~~ diff --git a/docs/requirements/specs/discovery.rst b/docs/requirements/specs/discovery.rst index 901c838b..24ec2f25 100644 --- a/docs/requirements/specs/discovery.rst +++ b/docs/requirements/specs/discovery.rst @@ -84,3 +84,13 @@ Discovery :tags: Discovery The endpoint shall return the component that hosts the addressed application. + +.. req:: GET /apps/{id}/belongs-to + :id: REQ_INTEROP_106 + :status: verified + :tags: Discovery + + The endpoint shall return the area that contains the addressed application, + resolved transitively via the app's parent component and that component's + area assignment. The response uses the standard ``items`` wrapper and + contains zero or one element. diff --git a/src/ros2_medkit_gateway/include/ros2_medkit_gateway/core/http/handlers/discovery_handlers.hpp b/src/ros2_medkit_gateway/include/ros2_medkit_gateway/core/http/handlers/discovery_handlers.hpp index 98566cc7..7f0fabf7 100644 --- a/src/ros2_medkit_gateway/include/ros2_medkit_gateway/core/http/handlers/discovery_handlers.hpp +++ b/src/ros2_medkit_gateway/include/ros2_medkit_gateway/core/http/handlers/discovery_handlers.hpp @@ -130,6 +130,12 @@ class DiscoveryHandlers { */ void handle_app_is_located_on(const httplib::Request & req, httplib::Response & res); + /** + * @brief Handle GET /apps/{app-id}/belongs-to - get parent area via component. + * @verifies REQ_INTEROP_106 + */ + void handle_app_belongs_to(const httplib::Request & req, httplib::Response & res); + // ========================================================================= // Function endpoints // ========================================================================= diff --git a/src/ros2_medkit_gateway/src/http/handlers/discovery_handlers.cpp b/src/ros2_medkit_gateway/src/http/handlers/discovery_handlers.cpp index 1e391c01..c82145ad 100644 --- a/src/ros2_medkit_gateway/src/http/handlers/discovery_handlers.cpp +++ b/src/ros2_medkit_gateway/src/http/handlers/discovery_handlers.cpp @@ -972,6 +972,7 @@ void DiscoveryHandlers::handle_get_app(const httplib::Request & req, httplib::Re if (!app.component_id.empty()) { response["is-located-on"] = "/api/v1/components/" + app.component_id; + response["belongs-to"] = base_uri + "/belongs-to"; } if (!app.depends_on.empty()) { @@ -1098,6 +1099,89 @@ void DiscoveryHandlers::handle_app_depends_on(const httplib::Request & req, http } } +void DiscoveryHandlers::handle_app_belongs_to(const httplib::Request & req, httplib::Response & res) { + try { + if (req.matches.size() < 2) { + HandlerContext::send_error(res, 400, ERR_INVALID_REQUEST, "Invalid request"); + return; + } + + std::string app_id = req.matches[1]; + + auto validation_result = ctx_.validate_entity_id(app_id); + if (!validation_result) { + HandlerContext::send_error(res, 400, ERR_INVALID_PARAMETER, "Invalid app ID", + {{"details", validation_result.error()}, {"app_id", app_id}}); + return; + } + + // Cache-first lookup: EntityCache has merged entities from peers + const auto & cache = ctx_.node()->get_thread_safe_cache(); + auto app_opt = cache.get_app(app_id); + if (!app_opt) { + auto discovery = ctx_.node()->get_discovery_manager(); + app_opt = discovery->get_app(app_id); + } + + if (!app_opt) { + HandlerContext::send_error(res, 404, ERR_ENTITY_NOT_FOUND, "App not found", {{"app_id", app_id}}); + return; + } + + json items = json::array(); + const auto & app = *app_opt; + + if (!app.component_id.empty()) { + auto component_opt = cache.get_component(app.component_id); + if (!component_opt) { + auto discovery = ctx_.node()->get_discovery_manager(); + component_opt = discovery->get_component(app.component_id); + } + + if (component_opt && !component_opt->area.empty()) { + const auto & area_id = component_opt->area; + json item; + item["id"] = area_id; + item["href"] = "/api/v1/areas/" + area_id; + + auto area_opt = cache.get_area(area_id); + if (!area_opt) { + auto discovery = ctx_.node()->get_discovery_manager(); + area_opt = discovery->get_area(area_id); + } + if (area_opt) { + item["name"] = area_opt->name.empty() ? area_id : area_opt->name; + } else { + item["name"] = area_id; + XMedkit ext; + ext.add("missing", true); + item["x-medkit"] = ext.build(); + RCLCPP_WARN(HandlerContext::logger(), "App '%s' belongs to unknown area '%s' (via component '%s')", + app_id.c_str(), area_id.c_str(), app.component_id.c_str()); + } + items.push_back(item); + } + } + + json response; + response["items"] = items; + + XMedkit resp_ext; + resp_ext.add("total_count", items.size()); + response["x-medkit"] = resp_ext.build(); + + json links; + links["self"] = "/api/v1/apps/" + app_id + "/belongs-to"; + links["app"] = "/api/v1/apps/" + app_id; + response["_links"] = links; + + HandlerContext::send_json(res, response); + } catch (const std::exception & e) { + HandlerContext::send_error(res, 500, ERR_INTERNAL_ERROR, "Internal server error", {{"details", e.what()}}); + RCLCPP_ERROR(HandlerContext::logger(), "Error in handle_app_belongs_to: %s", e.what()); + } +} + void DiscoveryHandlers::handle_app_is_located_on(const httplib::Request & req, httplib::Response & res) { try { if (req.matches.size() < 2) { diff --git a/src/ros2_medkit_gateway/src/http/rest_server.cpp b/src/ros2_medkit_gateway/src/http/rest_server.cpp index b214bc8f..709cf6c1 100644 --- a/src/ros2_medkit_gateway/src/http/rest_server.cpp +++ b/src/ros2_medkit_gateway/src/http/rest_server.cpp @@ -1131,6 +1131,18 @@ void RESTServer::setup_routes() { .response(200, "Host component(s)", SB::ref("EntityList")) .operation_id("getAppHost"); + reg.get(entity_path + "/belongs-to", + [this](auto & req, auto & res) { + discovery_handlers_->handle_app_belongs_to(req, res); + }) + .tag("Discovery") + .summary("Get app parent area") + .description( + "Returns the area this app belongs to via its parent component, as a 0-or-1 element " + "collection.") + .response(200, "Parent area", SB::ref("EntityList")) + .operation_id("getAppArea"); + reg.get(entity_path + "/depends-on", [this](auto & req, auto & res) { discovery_handlers_->handle_app_depends_on(req, res); diff --git a/src/ros2_medkit_gateway/test/test_discovery_handlers.cpp b/src/ros2_medkit_gateway/test/test_discovery_handlers.cpp index bbc5a438..a68a11b9 100644 --- a/src/ros2_medkit_gateway/test/test_discovery_handlers.cpp +++ b/src/ros2_medkit_gateway/test/test_discovery_handlers.cpp @@ -599,6 +599,7 @@ TEST_F(DiscoveryHandlersFixtureTest, GetAppReturnsLinksAndCapabilities) { auto body = parse_json(res); EXPECT_EQ(body["is-located-on"], "/api/v1/components/lidar_unit"); + EXPECT_EQ(body["belongs-to"], "/api/v1/apps/mapper/belongs-to"); EXPECT_EQ(body["depends-on"], "/api/v1/apps/mapper/depends-on"); EXPECT_EQ(body["_links"]["self"], "/api/v1/apps/mapper"); EXPECT_EQ(body["_links"]["is-located-on"], "/api/v1/components/lidar_unit"); @@ -689,6 +690,112 @@ TEST_F(DiscoveryHandlersFixtureTest, AppIsLocatedOnUnknownAppReturns404) { EXPECT_EQ(res.status, 404); } +// @verifies REQ_INTEROP_106 +TEST_F(DiscoveryHandlersFixtureTest, AppBelongsToReturnsParentArea) { + auto req = make_request_with_match("/api/v1/apps/mapper/belongs-to", R"(/api/v1/apps/([^/]+)/belongs-to)"); + httplib::Response res; + + handlers_->handle_app_belongs_to(req, res); + + auto body = parse_json(res); + ASSERT_EQ(body["items"].size(), 1); + EXPECT_EQ(body["items"][0]["id"], "sensors"); + EXPECT_EQ(body["items"][0]["name"], "Sensors"); + EXPECT_EQ(body["items"][0]["href"], "/api/v1/areas/sensors"); + EXPECT_EQ(body["x-medkit"]["total_count"], 1); + EXPECT_EQ(body["_links"]["self"], "/api/v1/apps/mapper/belongs-to"); + EXPECT_EQ(body["_links"]["app"], "/api/v1/apps/mapper"); +} + +// @verifies REQ_INTEROP_106 +TEST_F(DiscoveryHandlersFixtureTest, AppBelongsToReturnsEmptyWhenAppHasNoComponent) { + auto req = make_request_with_match("/api/v1/apps/standalone/belongs-to", R"(/api/v1/apps/([^/]+)/belongs-to)"); + httplib::Response res; + + handlers_->handle_app_belongs_to(req, res); + + auto body = parse_json(res); + ASSERT_TRUE(body["items"].empty()); + EXPECT_EQ(body["x-medkit"]["total_count"], 0); + EXPECT_EQ(body["_links"]["self"], "/api/v1/apps/standalone/belongs-to"); + EXPECT_EQ(body["_links"]["app"], "/api/v1/apps/standalone"); +} + +// @verifies REQ_INTEROP_106 +TEST_F(DiscoveryHandlersFixtureTest, AppBelongsToReturnsEmptyWhenComponentHasNoArea) { + // Strip the parent component's area assignment so the chain App->Component->Area breaks. + auto & cache = const_cast(suite_node_->get_thread_safe_cache()); + auto components = cache.get_components(); + bool updated = false; + for (auto & comp : components) { + if (comp.id == "lidar_unit") { + comp.area.clear(); + updated = true; + break; + } + } + ASSERT_TRUE(updated); + cache.update_components(components); + + auto req = make_request_with_match("/api/v1/apps/mapper/belongs-to", R"(/api/v1/apps/([^/]+)/belongs-to)"); + httplib::Response res; + + handlers_->handle_app_belongs_to(req, res); + + auto body = parse_json(res); + ASSERT_TRUE(body["items"].empty()); + EXPECT_EQ(body["x-medkit"]["total_count"], 0); +} + +// @verifies REQ_INTEROP_106 +TEST_F(DiscoveryHandlersFixtureTest, AppBelongsToReturnsMissingItemWhenAreaUnresolved) { + // Reassign the parent component to an area id that does not exist in the catalogue. + auto & cache = const_cast(suite_node_->get_thread_safe_cache()); + auto components = cache.get_components(); + bool updated = false; + for (auto & comp : components) { + if (comp.id == "lidar_unit") { + comp.area = "ghost_area"; + updated = true; + break; + } + } + ASSERT_TRUE(updated); + cache.update_components(components); + + auto req = make_request_with_match("/api/v1/apps/mapper/belongs-to", R"(/api/v1/apps/([^/]+)/belongs-to)"); + httplib::Response res; + + handlers_->handle_app_belongs_to(req, res); + + auto body = parse_json(res); + ASSERT_EQ(body["items"].size(), 1); + EXPECT_EQ(body["items"][0]["id"], "ghost_area"); + EXPECT_EQ(body["items"][0]["name"], "ghost_area"); + EXPECT_EQ(body["items"][0]["href"], "/api/v1/areas/ghost_area"); + EXPECT_EQ(body["items"][0]["x-medkit"]["missing"], true); +} + +// @verifies REQ_INTEROP_106 +TEST_F(DiscoveryHandlersValidationTest, AppBelongsToInvalidIdReturns400) { + auto req = make_request_with_match("/api/v1/apps/bad/id/belongs-to", R"(/api/v1/apps/(.+)/belongs-to)"); + httplib::Response res; + + handlers_.handle_app_belongs_to(req, res); + + EXPECT_EQ(res.status, 400); +} + +// @verifies REQ_INTEROP_106 +TEST_F(DiscoveryHandlersFixtureTest, AppBelongsToUnknownAppReturns404) { + auto req = make_request_with_match("/api/v1/apps/unknown/belongs-to", R"(/api/v1/apps/([^/]+)/belongs-to)"); + httplib::Response res; + + handlers_->handle_app_belongs_to(req, res); + + EXPECT_EQ(res.status, 404); +} + // @verifies REQ_INTEROP_009 TEST_F(DiscoveryHandlersFixtureTest, AppDependsOnReturnsResolvedAndMissingDependencies) { auto req = make_request_with_match("/api/v1/apps/mapper/depends-on", R"(/api/v1/apps/([^/]+)/depends-on)"); diff --git a/src/ros2_medkit_gateway/test/test_entity_resource_model.cpp b/src/ros2_medkit_gateway/test/test_entity_resource_model.cpp index 1f31b13a..6791d5af 100644 --- a/src/ros2_medkit_gateway/test/test_entity_resource_model.cpp +++ b/src/ros2_medkit_gateway/test/test_entity_resource_model.cpp @@ -171,6 +171,7 @@ TEST(EntityCapabilities, ComponentSupportsOperations) { TEST(EntityCapabilities, AppSupportsIsLocatedOn) { auto caps = EntityCapabilities::for_type(SovdEntityType::APP); EXPECT_TRUE(caps.supports_resource("is-located-on")); + EXPECT_TRUE(caps.supports_resource("belongs-to")); EXPECT_TRUE(caps.supports_collection(ResourceCollection::OPERATIONS)); EXPECT_FALSE(caps.supports_resource("hosts")); // Apps don't host anything } diff --git a/src/ros2_medkit_integration_tests/test/features/test_hateoas.test.py b/src/ros2_medkit_integration_tests/test/features/test_hateoas.test.py index acbec83a..e758d9b9 100644 --- a/src/ros2_medkit_integration_tests/test/features/test_hateoas.test.py +++ b/src/ros2_medkit_integration_tests/test/features/test_hateoas.test.py @@ -461,6 +461,44 @@ def test_is_located_on_apps_nonexistent(self): self.assertIn('parameters', data) self.assertEqual(data['parameters'].get('app_id'), 'nonexistent_app') + def test_belongs_to_apps_returns_parent_area(self): + """GET /apps/{id}/belongs-to returns the parent area via component.""" + # @verifies REQ_INTEROP_106 + response = requests.get( + f'{self.BASE_URL}/apps/engine-temp-sensor/belongs-to', + timeout=10 + ) + self.assertEqual(response.status_code, 200) + + data = response.json() + self.assertIn('items', data) + self.assertIn('_links', data) + self.assertEqual(data['_links']['self'], '/api/v1/apps/engine-temp-sensor/belongs-to') + self.assertEqual(data['_links']['app'], '/api/v1/apps/engine-temp-sensor') + self.assertLessEqual(len(data['items']), 1) + + # engine-temp-sensor -> temp-sensor-hw (component) -> engine (area) + self.assertEqual(len(data['items']), 1) + area = data['items'][0] + self.assertEqual(area['id'], 'engine') + self.assertIn('name', area) + self.assertEqual(area['href'], '/api/v1/areas/engine') + + def test_belongs_to_apps_nonexistent(self): + """GET /apps/{id}/belongs-to returns 404 for unknown app.""" + # @verifies REQ_INTEROP_106 + response = requests.get( + f'{self.BASE_URL}/apps/nonexistent_app/belongs-to', + timeout=10 + ) + self.assertEqual(response.status_code, 404) + + data = response.json() + self.assertIn('error_code', data) + self.assertEqual(data['message'], 'App not found') + self.assertIn('parameters', data) + self.assertEqual(data['parameters'].get('app_id'), 'nonexistent_app') + # ------------------------------------------------------------------ # Functions (test_81) # ------------------------------------------------------------------ From d989583bfe1c63ae5c5f112d0ed501f5fca4761c Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Wed, 13 May 2026 16:35:04 +0200 Subject: [PATCH 02/16] test(gateway): poll for trigger expiry, split app capability test Replace the fixed sleep in TriggerManagerTest.LifetimeExpiry with a bounded polling loop. The test now exits as soon as is_active flips to false on a fast machine (~1 s) and only sits up to the 5 s deadline if the scheduler stalls under heavy parallel test load. Also split AppSupportsIsLocatedOn into two cases so the name reflects what each test asserts: AppSupportsIsLocatedOn and AppSupportsBelongsTo. --- .../test/test_entity_resource_model.cpp | 6 +++++- .../test/test_trigger_manager.cpp | 12 ++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/ros2_medkit_gateway/test/test_entity_resource_model.cpp b/src/ros2_medkit_gateway/test/test_entity_resource_model.cpp index 6791d5af..73bb6754 100644 --- a/src/ros2_medkit_gateway/test/test_entity_resource_model.cpp +++ b/src/ros2_medkit_gateway/test/test_entity_resource_model.cpp @@ -171,11 +171,15 @@ TEST(EntityCapabilities, ComponentSupportsOperations) { TEST(EntityCapabilities, AppSupportsIsLocatedOn) { auto caps = EntityCapabilities::for_type(SovdEntityType::APP); EXPECT_TRUE(caps.supports_resource("is-located-on")); - EXPECT_TRUE(caps.supports_resource("belongs-to")); EXPECT_TRUE(caps.supports_collection(ResourceCollection::OPERATIONS)); EXPECT_FALSE(caps.supports_resource("hosts")); // Apps don't host anything } +TEST(EntityCapabilities, AppSupportsBelongsTo) { + auto caps = EntityCapabilities::for_type(SovdEntityType::APP); + EXPECT_TRUE(caps.supports_resource("belongs-to")); +} + TEST(EntityCapabilities, FunctionAggregatesCollections) { auto caps = EntityCapabilities::for_type(SovdEntityType::FUNCTION); // ros2_medkit extension: functions support additional collections via aggregation diff --git a/src/ros2_medkit_gateway/test/test_trigger_manager.cpp b/src/ros2_medkit_gateway/test/test_trigger_manager.cpp index 37ef6bbe..104bd661 100644 --- a/src/ros2_medkit_gateway/test/test_trigger_manager.cpp +++ b/src/ros2_medkit_gateway/test/test_trigger_manager.cpp @@ -316,8 +316,16 @@ TEST_F(TriggerManagerTest, LifetimeExpiry) { EXPECT_TRUE(manager_->is_active(created->id)); - // Sleep past expiry with margin for CPU contention during parallel testing - std::this_thread::sleep_for(std::chrono::milliseconds(1500)); + // Poll for expiry: finishes quickly on a fast machine but tolerates + // scheduler stalls under parallel test load. Deadline is well past the + // 1 s lifetime so a busy CI runner does not race the check. + const auto deadline = std::chrono::steady_clock::now() + std::chrono::milliseconds(5000); + while (std::chrono::steady_clock::now() < deadline) { + if (!manager_->is_active(created->id)) { + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + } EXPECT_FALSE(manager_->is_active(created->id)); } From b77f7a5124fea1b7d3bc3c3cbdb4790ab11b8c8b Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Wed, 13 May 2026 22:07:38 +0200 Subject: [PATCH 03/16] fix(gateway): expose belongs-to in app _links for HATEOAS discoverability handle_get_app already added the top-level "belongs-to" URI when the app has a parent component but did not also add it to the _links object. That left the new relationship discoverable from the top-level field but not from _links, while is-located-on and depends-on appear in both. Mirror the existing pattern: gate the _links entry on the same component_id condition and assert it in the unit test. --- src/ros2_medkit_gateway/src/http/handlers/discovery_handlers.cpp | 1 + src/ros2_medkit_gateway/test/test_discovery_handlers.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/ros2_medkit_gateway/src/http/handlers/discovery_handlers.cpp b/src/ros2_medkit_gateway/src/http/handlers/discovery_handlers.cpp index c82145ad..01c52516 100644 --- a/src/ros2_medkit_gateway/src/http/handlers/discovery_handlers.cpp +++ b/src/ros2_medkit_gateway/src/http/handlers/discovery_handlers.cpp @@ -995,6 +995,7 @@ void DiscoveryHandlers::handle_get_app(const httplib::Request & req, httplib::Re links.self("/api/v1/apps/" + app.id).collection("/api/v1/apps"); if (!app.component_id.empty()) { links.add("is-located-on", "/api/v1/components/" + app.component_id); + links.add("belongs-to", base_uri + "/belongs-to"); } response["_links"] = links.build(); diff --git a/src/ros2_medkit_gateway/test/test_discovery_handlers.cpp b/src/ros2_medkit_gateway/test/test_discovery_handlers.cpp index a68a11b9..ceb6f421 100644 --- a/src/ros2_medkit_gateway/test/test_discovery_handlers.cpp +++ b/src/ros2_medkit_gateway/test/test_discovery_handlers.cpp @@ -603,6 +603,7 @@ TEST_F(DiscoveryHandlersFixtureTest, GetAppReturnsLinksAndCapabilities) { EXPECT_EQ(body["depends-on"], "/api/v1/apps/mapper/depends-on"); EXPECT_EQ(body["_links"]["self"], "/api/v1/apps/mapper"); EXPECT_EQ(body["_links"]["is-located-on"], "/api/v1/components/lidar_unit"); + EXPECT_EQ(body["_links"]["belongs-to"], "/api/v1/apps/mapper/belongs-to"); EXPECT_EQ(body["_links"]["depends-on"][0], "/api/v1/apps/planner"); EXPECT_EQ(body["_links"]["depends-on"][1], "/api/v1/apps/ghost_app"); EXPECT_EQ(body["x-medkit"]["source"], "manifest"); From e5db563c0b528d760185ee9c48fd06412f3d0992 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Thu, 14 May 2026 09:42:14 +0200 Subject: [PATCH 04/16] test(gateway): join spin thread before reading get_*() results FaultManagerTest cases that called start_spinning() and then invoked fault_manager.get_snapshots() or get_rosbag() did not join the spin thread before letting test-scope locals run their destructors. Under TSan this produced flaky "data race in operator delete" warnings where the executor thread cleaning up the rclcpp client's pending request overlapped with destruction of the result and local fixtures in the main thread. Add stop_spinning() right after each get_*() call so the spin thread is cancelled and joined before the test reads from result or anything goes out of scope. Mirrors the pattern already used by the notify and trigger-event tests in this file. --- src/ros2_medkit_gateway/test/test_fault_manager.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/ros2_medkit_gateway/test/test_fault_manager.cpp b/src/ros2_medkit_gateway/test/test_fault_manager.cpp index de81aaaf..99569ae1 100644 --- a/src/ros2_medkit_gateway/test/test_fault_manager.cpp +++ b/src/ros2_medkit_gateway/test/test_fault_manager.cpp @@ -158,6 +158,7 @@ TEST_F(FaultManagerTest, GetSnapshotsSuccessWithValidJson) { FaultManager fault_manager(std::make_shared(node_.get())); auto result = fault_manager.get_snapshots("MOTOR_OVERHEAT"); + stop_spinning(); EXPECT_TRUE(result.success); EXPECT_TRUE(result.error_message.empty()); @@ -191,6 +192,7 @@ TEST_F(FaultManagerTest, GetSnapshotsUsesConfiguredFaultManagerNamespace) { FaultManager fault_manager(std::make_shared(node_.get())); auto result = fault_manager.get_snapshots("NAMESPACED_FAULT"); + stop_spinning(); EXPECT_TRUE(result.success); EXPECT_EQ(result.data["fault_code"], "NAMESPACED_FAULT"); @@ -220,6 +222,7 @@ TEST_F(FaultManagerTest, InvalidFaultManagerNamespaceFallsBackToRootServicePath) FaultManager fault_manager(std::make_shared(node_.get())); auto result = fault_manager.get_snapshots("INVALID_NAMESPACE_FAULT"); + stop_spinning(); EXPECT_TRUE(result.success); EXPECT_EQ(result.data["service_path"], "/fault_manager/get_snapshots"); @@ -241,6 +244,7 @@ TEST_F(FaultManagerTest, GetSnapshotsSuccessWithTopicFilter) { FaultManager fault_manager(std::make_shared(node_.get())); fault_manager.get_snapshots("TEST_FAULT", "/specific_topic"); + stop_spinning(); EXPECT_EQ(received_topic, "/specific_topic"); } @@ -258,6 +262,7 @@ TEST_F(FaultManagerTest, GetSnapshotsErrorResponse) { FaultManager fault_manager(std::make_shared(node_.get())); auto result = fault_manager.get_snapshots("NONEXISTENT_FAULT"); + stop_spinning(); EXPECT_FALSE(result.success); EXPECT_EQ(result.error_message, "Fault not found"); @@ -276,6 +281,7 @@ TEST_F(FaultManagerTest, GetSnapshotsInvalidJsonFallback) { FaultManager fault_manager(std::make_shared(node_.get())); auto result = fault_manager.get_snapshots("TEST_FAULT"); + stop_spinning(); EXPECT_TRUE(result.success); // When JSON parsing fails, raw_data should be returned @@ -296,6 +302,7 @@ TEST_F(FaultManagerTest, GetSnapshotsEmptyResponse) { FaultManager fault_manager(std::make_shared(node_.get())); auto result = fault_manager.get_snapshots("TEST_FAULT"); + stop_spinning(); EXPECT_TRUE(result.success); EXPECT_TRUE(result.data.is_object()); @@ -334,6 +341,7 @@ TEST_F(FaultManagerTest, GetRosbagSuccess) { FaultManager fault_manager(std::make_shared(node_.get())); auto result = fault_manager.get_rosbag("TEST_ROSBAG_FAULT"); + stop_spinning(); EXPECT_TRUE(result.success); EXPECT_TRUE(result.data.contains("file_path")); @@ -367,6 +375,7 @@ TEST_F(FaultManagerTest, GetRosbagUsesConfiguredFaultManagerNamespace) { FaultManager fault_manager(std::make_shared(node_.get())); auto result = fault_manager.get_rosbag("NAMESPACED_ROSBAG"); + stop_spinning(); EXPECT_TRUE(result.success); EXPECT_EQ(result.data["file_path"], "/tmp/NAMESPACED_ROSBAG"); @@ -580,6 +589,7 @@ TEST_F(FaultManagerTest, GetRosbagNotFound) { FaultManager fault_manager(std::make_shared(node_.get())); auto result = fault_manager.get_rosbag("NONEXISTENT_FAULT"); + stop_spinning(); EXPECT_FALSE(result.success); EXPECT_EQ(result.error_message, "No rosbag file available for fault"); From 3e35da3c424c62aaf89d203cd99ab408826743df Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Thu, 14 May 2026 09:42:21 +0200 Subject: [PATCH 05/16] ci(coverage): install gpg required by codecov-action The "Upload coverage to Codecov" step runs in an ubuntu:noble container that does not include gpg. codecov/codecov-action@v5 runs a dependency check that requires gpg and exits with code 1 when it is missing, failing the coverage job on every push to main since the step is gated on github.ref == 'refs/heads/main'. Add gpg to the apt-get install line so the action can validate uploader signatures. --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ca4c7ad4..9a34a9c3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -280,7 +280,8 @@ jobs: - name: Install dependencies run: | apt-get update - apt-get install -y lcov ros-jazzy-test-msgs + # gpg is required by codecov/codecov-action@v5 dependency check + apt-get install -y lcov ros-jazzy-test-msgs gpg source /opt/ros/jazzy/setup.bash rosdep update rosdep install --from-paths src --ignore-src -r -y From 9dcce7acc4a9166e5dcaf8b52514a597f7dddde3 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Thu, 14 May 2026 10:14:28 +0200 Subject: [PATCH 06/16] test(integration): widen gateway shutdown timeouts for sanitizer overhead The gateway shutdown sequence (mdns stop, REST server stop, transport teardown, plugin manager shutdown, member destructors) completes well under a second normally, but under TSan/ASan overhead on CI runners it can exceed the launch_ros defaults (5s sigterm_timeout / 5s sigkill_timeout). When that happens launch_testing escalates SIGINT -> SIGTERM -> SIGKILL and the TestShutdown.test_exit_codes assertion reports exit code -9 even though the gateway is mid-shutdown. The TSan run for PR #398 hit exactly this on test_scenario_thermal_protection: the gateway logged TriggerTopicSubscriber shutdown complete and other steps but was still cleaning up when the SIGKILL timer fired. Set sigterm_timeout=30 and sigkill_timeout=15 in create_gateway_node so every integration test that goes through this factory gets the longer windows. The bigger numbers do not slow down healthy runs since the process exits as soon as shutdown actually finishes. --- .../ros2_medkit_test_utils/launch_helpers.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/ros2_medkit_integration_tests/ros2_medkit_test_utils/launch_helpers.py b/src/ros2_medkit_integration_tests/ros2_medkit_test_utils/launch_helpers.py index 612afd8f..78c777da 100644 --- a/src/ros2_medkit_integration_tests/ros2_medkit_test_utils/launch_helpers.py +++ b/src/ros2_medkit_integration_tests/ros2_medkit_test_utils/launch_helpers.py @@ -106,6 +106,14 @@ def create_gateway_node(*, port=DEFAULT_PORT, extra_params=None, coverage=True): output='screen', parameters=[params], additional_env=get_coverage_env() if coverage else {}, + # Default SIGINT->SIGTERM escalation is 5s and SIGTERM->SIGKILL is 5s. + # Under TSan/ASan the gateway shutdown sequence (mdns stop, REST server + # stop, transport teardown, plugin shutdown) easily exceeds 5s on slower + # CI runners, causing launch_testing to escalate to SIGKILL and the + # TestShutdown.test_exit_codes check to report -9. The process still + # shuts down cleanly given enough time, so widen both windows. + sigterm_timeout='30', + sigkill_timeout='15', ) From f83d8cc01f2786e4e1ce93c2017f833bc7911f2b Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Thu, 14 May 2026 10:53:51 +0200 Subject: [PATCH 07/16] fix(plugins): make destructors noexcept and guard rclcpp resource reset Three gateway plugins held rclcpp::Node or rclcpp::Subscription members and called shutdown() from a non-noexcept destructor. On Rolling rclcpp the destructor of those resources throws graph_listener::NodeNotFoundError when the rclcpp context has already been invalidated by rclcpp::shutdown() (which is exactly the situation during late teardown). An exception escaping a destructor invokes std::terminate(), which is what test_beacon_param TestShutdown.test_exit_codes saw as exit code -6 on the rolling job. Plugin_manager::shutdown_all() already catches exceptions from plugin->shutdown(), but that does not help when the plugin destructor itself runs the throw (either by calling shutdown() again or by member auto-destruction once the explicit reset() failed). Fix in three plugins (parameter_beacon, topic_beacon, graph_provider): 1. Mark the destructor noexcept and wrap the shutdown() call in try-catch so nothing escapes the destructor. 2. Wrap the rclcpp resource reset() inside shutdown() in try-catch so the first shutdown attempt (driven by plugin_manager) drops the resource even when rclcpp throws on tear-down. Lifted only the destruction sites; the rest of shutdown() and the normal happy path are unchanged. --- .../param_beacon_plugin.hpp | 2 +- .../src/param_beacon_plugin.cpp | 19 ++++++++++++++++--- .../topic_beacon_plugin.hpp | 2 +- .../src/topic_beacon_plugin.cpp | 18 +++++++++++++++--- .../graph_provider_plugin.hpp | 2 +- .../src/graph_provider_plugin.cpp | 19 ++++++++++++++++--- 6 files changed, 50 insertions(+), 12 deletions(-) diff --git a/src/ros2_medkit_discovery_plugins/ros2_medkit_param_beacon/include/ros2_medkit_param_beacon/param_beacon_plugin.hpp b/src/ros2_medkit_discovery_plugins/ros2_medkit_param_beacon/include/ros2_medkit_param_beacon/param_beacon_plugin.hpp index 8c85724e..97e1bd5c 100644 --- a/src/ros2_medkit_discovery_plugins/ros2_medkit_param_beacon/include/ros2_medkit_param_beacon/param_beacon_plugin.hpp +++ b/src/ros2_medkit_discovery_plugins/ros2_medkit_param_beacon/include/ros2_medkit_param_beacon/param_beacon_plugin.hpp @@ -45,7 +45,7 @@ class ParameterBeaconPlugin : public ros2_medkit_gateway::GatewayPlugin, public ros2_medkit_gateway::IntrospectionProvider { public: ParameterBeaconPlugin() = default; - ~ParameterBeaconPlugin() override; + ~ParameterBeaconPlugin() noexcept override; /// Constructor with injectable client factory (for testing). explicit ParameterBeaconPlugin(ros2_medkit_param_beacon::ParameterClientFactory factory) diff --git a/src/ros2_medkit_discovery_plugins/ros2_medkit_param_beacon/src/param_beacon_plugin.cpp b/src/ros2_medkit_discovery_plugins/ros2_medkit_param_beacon/src/param_beacon_plugin.cpp index 9a53e84d..93f2d87e 100644 --- a/src/ros2_medkit_discovery_plugins/ros2_medkit_param_beacon/src/param_beacon_plugin.cpp +++ b/src/ros2_medkit_discovery_plugins/ros2_medkit_param_beacon/src/param_beacon_plugin.cpp @@ -34,8 +34,14 @@ using ros2_medkit_gateway::PLUGIN_API_VERSION; using ros2_medkit_gateway::PluginContext; using ros2_medkit_gateway::SovdEntityType; -ParameterBeaconPlugin::~ParameterBeaconPlugin() { - shutdown(); +ParameterBeaconPlugin::~ParameterBeaconPlugin() noexcept { + // On Rolling, ~rclcpp::Node can throw graph_listener::NodeNotFoundError + // once rclcpp::shutdown() has invalidated the context. An exception + // escaping a destructor calls std::terminate(), so swallow it here. + try { + shutdown(); + } catch (...) { + } } std::string ParameterBeaconPlugin::name() const { @@ -151,7 +157,14 @@ void ParameterBeaconPlugin::shutdown() { backoff_counts_.clear(); skip_remaining_.clear(); } - param_node_.reset(); + // ~rclcpp::Node can throw graph_listener::NodeNotFoundError on Rolling + // when the context was already torn down by rclcpp::shutdown(). Swallow + // it so the plugin_manager shutdown sequence (and the plugin destructor + // that calls back into us) does not abort the process. + try { + param_node_.reset(); + } catch (...) { + } } std::vector ParameterBeaconPlugin::get_routes() { diff --git a/src/ros2_medkit_discovery_plugins/ros2_medkit_topic_beacon/include/ros2_medkit_topic_beacon/topic_beacon_plugin.hpp b/src/ros2_medkit_discovery_plugins/ros2_medkit_topic_beacon/include/ros2_medkit_topic_beacon/topic_beacon_plugin.hpp index 0be750cd..e5753100 100644 --- a/src/ros2_medkit_discovery_plugins/ros2_medkit_topic_beacon/include/ros2_medkit_topic_beacon/topic_beacon_plugin.hpp +++ b/src/ros2_medkit_discovery_plugins/ros2_medkit_topic_beacon/include/ros2_medkit_topic_beacon/topic_beacon_plugin.hpp @@ -81,7 +81,7 @@ class TopicBeaconPlugin : public ros2_medkit_gateway::GatewayPlugin, public ros2 void configure(const nlohmann::json & config) override; void set_context(ros2_medkit_gateway::PluginContext & context) override; void shutdown() override; - ~TopicBeaconPlugin() override; + ~TopicBeaconPlugin() noexcept override; std::vector get_routes() override; ros2_medkit_gateway::IntrospectionResult introspect(const ros2_medkit_gateway::IntrospectionInput & input) override; diff --git a/src/ros2_medkit_discovery_plugins/ros2_medkit_topic_beacon/src/topic_beacon_plugin.cpp b/src/ros2_medkit_discovery_plugins/ros2_medkit_topic_beacon/src/topic_beacon_plugin.cpp index e2ecdf31..aa8ce05a 100644 --- a/src/ros2_medkit_discovery_plugins/ros2_medkit_topic_beacon/src/topic_beacon_plugin.cpp +++ b/src/ros2_medkit_discovery_plugins/ros2_medkit_topic_beacon/src/topic_beacon_plugin.cpp @@ -32,8 +32,14 @@ using ros2_medkit_gateway::PLUGIN_API_VERSION; using ros2_medkit_gateway::PluginContext; using ros2_medkit_gateway::SovdEntityType; -TopicBeaconPlugin::~TopicBeaconPlugin() { - shutdown(); +TopicBeaconPlugin::~TopicBeaconPlugin() noexcept { + // On Rolling, ~rclcpp::Subscription can throw graph_listener::NodeNotFoundError + // once rclcpp::shutdown() has invalidated the context. An exception + // escaping a destructor calls std::terminate(), so swallow it here. + try { + shutdown(); + } catch (...) { + } } std::string TopicBeaconPlugin::name() const { @@ -107,7 +113,13 @@ void TopicBeaconPlugin::shutdown() { if (shutdown_requested_.exchange(true)) { return; } - subscription_.reset(); + // ~rclcpp::Subscription can throw on Rolling when the rclcpp context + // was torn down before us; swallow so plugin_manager shutdown and the + // plugin destructor calling back into us do not abort the process. + try { + subscription_.reset(); + } catch (...) { + } } std::vector TopicBeaconPlugin::get_routes() { diff --git a/src/ros2_medkit_plugins/ros2_medkit_graph_provider/include/ros2_medkit_graph_provider/graph_provider_plugin.hpp b/src/ros2_medkit_plugins/ros2_medkit_graph_provider/include/ros2_medkit_graph_provider/graph_provider_plugin.hpp index 3921fbd2..56f6b517 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_graph_provider/include/ros2_medkit_graph_provider/graph_provider_plugin.hpp +++ b/src/ros2_medkit_plugins/ros2_medkit_graph_provider/include/ros2_medkit_graph_provider/graph_provider_plugin.hpp @@ -55,7 +55,7 @@ class GraphProviderPlugin : public GatewayPlugin, public IntrospectionProvider { }; GraphProviderPlugin() = default; - ~GraphProviderPlugin() override; + ~GraphProviderPlugin() noexcept override; std::string name() const override; void configure(const nlohmann::json & config) override; diff --git a/src/ros2_medkit_plugins/ros2_medkit_graph_provider/src/graph_provider_plugin.cpp b/src/ros2_medkit_plugins/ros2_medkit_graph_provider/src/graph_provider_plugin.cpp index e93adbde..ad3cf0f4 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_graph_provider/src/graph_provider_plugin.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_graph_provider/src/graph_provider_plugin.cpp @@ -317,15 +317,28 @@ nlohmann::json build_graph_document_for_apps(const std::string & function_id, } // namespace -GraphProviderPlugin::~GraphProviderPlugin() { - shutdown(); +GraphProviderPlugin::~GraphProviderPlugin() noexcept { + // On Rolling, destroying rclcpp resources (Subscription, Node) after + // rclcpp::shutdown() has invalidated the context can throw + // graph_listener::NodeNotFoundError. An exception escaping a destructor + // calls std::terminate(), so swallow it here. + try { + shutdown(); + } catch (...) { + } } void GraphProviderPlugin::shutdown() { if (shutdown_requested_.exchange(true)) { return; } - diagnostics_sub_.reset(); + // ~rclcpp::Subscription can throw on Rolling when the rclcpp context + // was torn down before us; swallow so plugin_manager shutdown and the + // plugin destructor calling back into us do not abort the process. + try { + diagnostics_sub_.reset(); + } catch (...) { + } } std::string GraphProviderPlugin::name() const { From 4727fe08f02b244e8c571328b72c2a178b2146f7 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Thu, 14 May 2026 17:25:21 +0200 Subject: [PATCH 08/16] fix(gateway): allow non-admin roles to query app relationship endpoints auth_config.cpp had no entries for apps/*/belongs-to or apps/*/is-located-on under any of viewer / operator / configurator. The apps/* wildcard in the catch-all entry is single-segment (auth_requirement_policy maps * to [^/]+), so it does not cover the relationship subresources. Only admin had a matching apps/** fallback, which meant every other role hit 403 on these endpoints. Add the two new entries for each role next to the existing apps/*/depends-on entry. is-located-on is pre-existing - this also fixes the same gap that shipped with REQ_INTEROP_105. --- src/ros2_medkit_gateway/src/core/auth/auth_config.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/ros2_medkit_gateway/src/core/auth/auth_config.cpp b/src/ros2_medkit_gateway/src/core/auth/auth_config.cpp index 8f4c6598..7e5c47a4 100644 --- a/src/ros2_medkit_gateway/src/core/auth/auth_config.cpp +++ b/src/ros2_medkit_gateway/src/core/auth/auth_config.cpp @@ -47,6 +47,8 @@ const std::unordered_map> & AuthConfig "GET:/api/v1/components/*/hosts", "GET:/api/v1/components/*/depends-on", "GET:/api/v1/apps/*/depends-on", + "GET:/api/v1/apps/*/is-located-on", + "GET:/api/v1/apps/*/belongs-to", "GET:/api/v1/functions/*/hosts", // Data: all entity types "GET:/api/v1/components/*/data", @@ -186,6 +188,8 @@ const std::unordered_map> & AuthConfig "GET:/api/v1/components/*/hosts", "GET:/api/v1/components/*/depends-on", "GET:/api/v1/apps/*/depends-on", + "GET:/api/v1/apps/*/is-located-on", + "GET:/api/v1/apps/*/belongs-to", "GET:/api/v1/functions/*/hosts", // Data: all entity types (read) "GET:/api/v1/components/*/data", @@ -386,6 +390,8 @@ const std::unordered_map> & AuthConfig "GET:/api/v1/components/*/hosts", "GET:/api/v1/components/*/depends-on", "GET:/api/v1/apps/*/depends-on", + "GET:/api/v1/apps/*/is-located-on", + "GET:/api/v1/apps/*/belongs-to", "GET:/api/v1/functions/*/hosts", // Data: all entity types (read) "GET:/api/v1/components/*/data", From aaef97c531b7bccb0d1f315fcbe863cd3e6246c5 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Thu, 14 May 2026 17:26:27 +0200 Subject: [PATCH 09/16] fix(gateway): surface broken parent component on apps/{id}/belongs-to handle_app_belongs_to silently returned items=[] when the app's component_id pointed to a component the discovery layer could not resolve. HATEOAS clients then could not tell 'app legitimately has no parent area' from 'manifest broken, parent component gone'. Add the same symmetric branch handle_app_is_located_on already has: emit a single item with x-medkit.missing=true and an x-medkit extension field carrying the dangling component id, plus a RCLCPP_WARN. A component that resolves but has no area_id assigned still produces items=[] - that is a valid manifest configuration, not a broken ref. Also tighten the chain traversal with an atomic ThreadSafeEntityCache snapshot so the three lookups (app, component, area) cannot observe mixed cache generations under a concurrent refresh, and switch the three app relationship handlers to validate_entity_for_route so remote apps in an aggregation cluster get forwarded instead of 404. Mirror unit test AppIsLocatedOnReturnsMissingItemWhenHostComponentUnresolved. --- .../src/http/handlers/discovery_handlers.cpp | 124 +++++++++++------- .../test/test_discovery_handlers.cpp | 33 +++++ 2 files changed, 108 insertions(+), 49 deletions(-) diff --git a/src/ros2_medkit_gateway/src/http/handlers/discovery_handlers.cpp b/src/ros2_medkit_gateway/src/http/handlers/discovery_handlers.cpp index 01c52516..d884c2db 100644 --- a/src/ros2_medkit_gateway/src/http/handlers/discovery_handlers.cpp +++ b/src/ros2_medkit_gateway/src/http/handlers/discovery_handlers.cpp @@ -982,6 +982,16 @@ void DiscoveryHandlers::handle_get_app(const httplib::Request & req, httplib::Re using Cap = CapabilityBuilder::Capability; std::vector caps = {Cap::DATA, Cap::OPERATIONS, Cap::CONFIGURATIONS, Cap::FAULTS, Cap::LOGS, Cap::BULK_DATA, Cap::CYCLIC_SUBSCRIPTIONS, Cap::TRIGGERS}; + // Relationship endpoints are gated the same way as the top-level URI keys + // above so the three advertising surfaces (top-level URIs, `_links`, + // `capabilities` array) describe the same set of available collections. + if (!app.component_id.empty()) { + caps.push_back(Cap::IS_LOCATED_ON); + caps.push_back(Cap::BELONGS_TO); + } + if (!app.depends_on.empty()) { + caps.push_back(Cap::DEPENDS_ON); + } if (ctx_.node()->get_script_manager() && ctx_.node()->get_script_manager()->has_backend()) { caps.push_back(Cap::SCRIPTS); } @@ -1034,14 +1044,15 @@ void DiscoveryHandlers::handle_app_depends_on(const httplib::Request & req, http std::string app_id = req.matches[1]; - auto validation_result = ctx_.validate_entity_id(app_id); - if (!validation_result) { - HandlerContext::send_error(res, 400, ERR_INVALID_PARAMETER, "Invalid app ID", - {{"details", validation_result.error()}, {"app_id", app_id}}); + // validate_entity_for_route forwards the request to the owning peer when + // the app is remote (aggregation setup) - validate_entity_id alone would + // resolve locally only and return 404 for remote apps. + auto entity_opt = ctx_.validate_entity_for_route(req, res, app_id); + if (!entity_opt) { return; } - // Cache-first lookup: EntityCache has merged entities from peers + // Local entity: fetch full App model for relationship data. const auto & cache = ctx_.node()->get_thread_safe_cache(); auto app_opt = cache.get_app(app_id); if (!app_opt) { @@ -1109,49 +1120,62 @@ void DiscoveryHandlers::handle_app_belongs_to(const httplib::Request & req, http std::string app_id = req.matches[1]; - auto validation_result = ctx_.validate_entity_id(app_id); - if (!validation_result) { - HandlerContext::send_error(res, 400, ERR_INVALID_PARAMETER, "Invalid app ID", - {{"details", validation_result.error()}, {"app_id", app_id}}); + // Forward to peer if app is remote (aggregation setup); locally + // resolved entity falls through to the cache/discovery lookup below. + auto entity_opt = ctx_.validate_entity_for_route(req, res, app_id); + if (!entity_opt) { return; } - // Cache-first lookup: EntityCache has merged entities from peers + // Atomic snapshot avoids a mixed-generation view of App -> Component -> + // Area across concurrent cache refreshes. Fall back to the discovery + // manager (no atomic guarantee, but rare path) when the app is not in + // the cache. const auto & cache = ctx_.node()->get_thread_safe_cache(); - auto app_opt = cache.get_app(app_id); - if (!app_opt) { + auto snapshot = cache.get_app_with_links(app_id); + if (!snapshot.app) { auto discovery = ctx_.node()->get_discovery_manager(); - app_opt = discovery->get_app(app_id); + snapshot.app = discovery->get_app(app_id); + if (snapshot.app && !snapshot.app->component_id.empty()) { + snapshot.component = discovery->get_component(snapshot.app->component_id); + if (snapshot.component && !snapshot.component->area.empty()) { + snapshot.area = discovery->get_area(snapshot.component->area); + } + } } - if (!app_opt) { + if (!snapshot.app) { HandlerContext::send_error(res, 404, ERR_ENTITY_NOT_FOUND, "App not found", {{"app_id", app_id}}); return; } json items = json::array(); - const auto & app = *app_opt; + const auto & app = *snapshot.app; if (!app.component_id.empty()) { - auto component_opt = cache.get_component(app.component_id); - if (!component_opt) { - auto discovery = ctx_.node()->get_discovery_manager(); - component_opt = discovery->get_component(app.component_id); - } - - if (component_opt && !component_opt->area.empty()) { - const auto & area_id = component_opt->area; + if (!snapshot.component) { + // Mirror handle_app_is_located_on: surface broken parent reference + // with x-medkit.missing=true so HATEOAS clients can distinguish + // 'no parent area' from 'manifest broken, component gone'. + json item; + item["id"] = ""; + item["name"] = ""; + item["href"] = ""; + XMedkit ext; + ext.add("missing", true); + ext.add("unresolved_component", app.component_id); + item["x-medkit"] = ext.build(); + items.push_back(item); + RCLCPP_WARN(HandlerContext::logger(), "App '%s' belongs-to unresolvable: parent component '%s' is unknown", + app_id.c_str(), app.component_id.c_str()); + } else if (!snapshot.component->area.empty()) { + const auto & area_id = snapshot.component->area; json item; item["id"] = area_id; item["href"] = "/api/v1/areas/" + area_id; - auto area_opt = cache.get_area(area_id); - if (!area_opt) { - auto discovery = ctx_.node()->get_discovery_manager(); - area_opt = discovery->get_area(area_id); - } - if (area_opt) { - item["name"] = area_opt->name.empty() ? area_id : area_opt->name; + if (snapshot.area) { + item["name"] = snapshot.area->name.empty() ? area_id : snapshot.area->name; } else { item["name"] = area_id; XMedkit ext; @@ -1162,6 +1186,9 @@ void DiscoveryHandlers::handle_app_belongs_to(const httplib::Request & req, http } items.push_back(item); } + // If component resolves but has no area_id assigned, items stays + // empty - that is a legitimate manifest configuration (component + // without parent area), not a broken reference. } json response; @@ -1192,40 +1219,39 @@ void DiscoveryHandlers::handle_app_is_located_on(const httplib::Request & req, h std::string app_id = req.matches[1]; - auto validation_result = ctx_.validate_entity_id(app_id); - if (!validation_result) { - HandlerContext::send_error(res, 400, ERR_INVALID_PARAMETER, "Invalid app ID", - {{"details", validation_result.error()}, {"app_id", app_id}}); + // Forward to peer if app is remote (aggregation setup). + auto entity_opt = ctx_.validate_entity_for_route(req, res, app_id); + if (!entity_opt) { return; } - // Cache-first lookup: EntityCache has merged entities from peers + // Atomic snapshot keeps app -> component consistent across a concurrent + // cache refresh. Fall back to discovery manager when the app is not in + // the cache. const auto & cache = ctx_.node()->get_thread_safe_cache(); - auto app_opt = cache.get_app(app_id); - if (!app_opt) { + auto snapshot = cache.get_app_with_links(app_id); + if (!snapshot.app) { auto discovery = ctx_.node()->get_discovery_manager(); - app_opt = discovery->get_app(app_id); + snapshot.app = discovery->get_app(app_id); + if (snapshot.app && !snapshot.app->component_id.empty()) { + snapshot.component = discovery->get_component(snapshot.app->component_id); + } } - if (!app_opt) { + if (!snapshot.app) { HandlerContext::send_error(res, 404, ERR_ENTITY_NOT_FOUND, "App not found", {{"app_id", app_id}}); return; } json items = json::array(); - const auto & app = *app_opt; + const auto & app = *snapshot.app; if (!app.component_id.empty()) { - auto component_opt = cache.get_component(app.component_id); - if (!component_opt) { - auto discovery = ctx_.node()->get_discovery_manager(); - component_opt = discovery->get_component(app.component_id); - } - if (component_opt) { + if (snapshot.component) { json item; - item["id"] = component_opt->id; - item["name"] = component_opt->name.empty() ? component_opt->id : component_opt->name; - item["href"] = "/api/v1/components/" + component_opt->id; + item["id"] = snapshot.component->id; + item["name"] = snapshot.component->name.empty() ? snapshot.component->id : snapshot.component->name; + item["href"] = "/api/v1/components/" + snapshot.component->id; items.push_back(item); } else { json item; diff --git a/src/ros2_medkit_gateway/test/test_discovery_handlers.cpp b/src/ros2_medkit_gateway/test/test_discovery_handlers.cpp index ceb6f421..c88cd228 100644 --- a/src/ros2_medkit_gateway/test/test_discovery_handlers.cpp +++ b/src/ros2_medkit_gateway/test/test_discovery_handlers.cpp @@ -722,6 +722,39 @@ TEST_F(DiscoveryHandlersFixtureTest, AppBelongsToReturnsEmptyWhenAppHasNoCompone EXPECT_EQ(body["_links"]["app"], "/api/v1/apps/standalone"); } +// @verifies REQ_INTEROP_106 +TEST_F(DiscoveryHandlersFixtureTest, AppBelongsToReturnsMissingItemWhenParentComponentUnresolved) { + // Mirror of AppIsLocatedOnReturnsMissingItemWhenHostComponentUnresolved: surface a broken + // parent reference instead of silently returning items=[]. Lets HATEOAS clients tell + // 'app has no parent area' from 'manifest broken, component gone'. + auto & cache = const_cast(suite_node_->get_thread_safe_cache()); + auto apps = cache.get_apps(); + ASSERT_FALSE(apps.empty()); + + bool updated = false; + for (auto & app : apps) { + if (app.id == "mapper") { + app.component_id = "ghost_component"; + updated = true; + break; + } + } + ASSERT_TRUE(updated); + cache.update_apps(apps); + + auto req = make_request_with_match("/api/v1/apps/mapper/belongs-to", R"(/api/v1/apps/([^/]+)/belongs-to)"); + httplib::Response res; + + handlers_->handle_app_belongs_to(req, res); + + auto body = parse_json(res); + ASSERT_EQ(body["items"].size(), 1); + EXPECT_EQ(body["items"][0]["x-medkit"]["missing"], true); + EXPECT_EQ(body["items"][0]["x-medkit"]["unresolved_component"], "ghost_component"); + EXPECT_EQ(body["items"][0]["href"], ""); + EXPECT_EQ(body["x-medkit"]["total_count"], 1); +} + // @verifies REQ_INTEROP_106 TEST_F(DiscoveryHandlersFixtureTest, AppBelongsToReturnsEmptyWhenComponentHasNoArea) { // Strip the parent component's area assignment so the chain App->Component->Area breaks. From 97c654c82b8e5aa4dd7f07b30ecd0a2d223c75d1 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Thu, 14 May 2026 17:27:45 +0200 Subject: [PATCH 10/16] fix(gateway): align belongs-to advertising and add atomic entity snapshot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GET /apps/{id} exposes the available subresources on three parallel surfaces (top-level URI references, _links, and the capabilities array); the belongs-to addition only updated two of them. App::to_capabilities, used by notifications / snapshots / change events, also lacked the link. Brought all four sites into agreement by adding BELONGS_TO and IS_LOCATED_ON entries to CapabilityBuilder::Capability (IS_LOCATED_ON was already missing - boy-scouted along with the new one), pushing them into the caps vector under the same component_id gate as the existing URI / _links emission, and adding belongs-to to App::to_capabilities. SOVD ISO 17978-3 §7.6 defines /belongs-to only for /apps. The previous EntityCapabilities table advertised belongs-to for SERVER and COMPONENT too, so supports_resource("belongs-to") returned true on those types and clients hit 404 when following the advertised resource. Dropped both entries from resources_; SERVER and COMPONENT keep their other relationship resources. Add an atomic ThreadSafeEntityCache::get_app_with_links() that resolves an App together with its parent Component and Area under a single shared_lock. Three sequential get_* calls each take a fresh shared_lock, which lets a writer refresh advance the generation between them and return a mixed-generation view (app from gen N, component from N+1, area from N). The handler refactor is in the previous commit; this one adds the method and its unit test coverage via the handler suite. Document on docs/api/rest.rst that standalone apps deliberately do not expose the belongs-to URI - SOVD §7.6 says "Each URI reference is only present if the entity supports that collection". HATEOAS-only clients must rely on absence of the link. Switch test_hateoas integration tests for the 200 paths to self.poll_endpoint(), which adds retry under sanitizer / CI load instead of the raw requests.get(timeout=10) used before. Update the 404 shape to the generic 'Entity not found' / parameters.entity_id payload emitted by validate_entity_for_route (the relationship handlers now go through it so remote apps in an aggregation cluster get forwarded). --- docs/api/rest.rst | 11 +++++- .../core/http/handlers/capability_builder.hpp | 2 ++ .../core/models/thread_safe_entity_cache.hpp | 22 ++++++++++++ .../src/core/discovery/models/app.cpp | 1 + .../core/http/handlers/capability_builder.cpp | 4 +++ .../src/core/models/entity_capabilities.cpp | 14 +++++--- .../core/models/thread_safe_entity_cache.cpp | 34 +++++++++++++++++++ .../test/test_discovery_models.cpp | 27 +++++++++++++++ .../test/features/test_hateoas.test.py | 33 ++++++++---------- 9 files changed, 124 insertions(+), 24 deletions(-) diff --git a/docs/api/rest.rst b/docs/api/rest.rst index 154e5c27..13f5c0ad 100644 --- a/docs/api/rest.rst +++ b/docs/api/rest.rst @@ -280,13 +280,22 @@ Apps ``GET /api/v1/apps/{app_id}/belongs-to`` Return the area that contains this app via its parent component. + Per SOVD (ISO 17978-3 §7.6, see :ref:`sovd-api-discovery`), the corresponding + ``belongs-to`` URI reference in ``GET /apps/{app_id}`` is only emitted when + the app has a parent component (i.e. is not standalone). Standalone apps do + not expose this subresource in HATEOAS and the endpoint will return an empty + ``items`` collection if called directly. + The response follows the standard ``items`` wrapper and returns: - - ``0`` items when the app has no associated host component + - ``0`` items when the app has no associated host component (standalone app) - ``0`` items when the parent component has no assigned area - ``1`` item when the area is resolved - ``1`` item with ``x-medkit.missing=true`` when the parent component references an area that cannot currently be resolved + - ``1`` item with ``x-medkit.missing=true`` and ``x-medkit.unresolved_component`` + set to the dangling component id when the app references a parent component + that cannot currently be resolved (manifest broken / component removed) **Example Response:** diff --git a/src/ros2_medkit_gateway/include/ros2_medkit_gateway/core/http/handlers/capability_builder.hpp b/src/ros2_medkit_gateway/include/ros2_medkit_gateway/core/http/handlers/capability_builder.hpp index 02040479..b40cb143 100644 --- a/src/ros2_medkit_gateway/include/ros2_medkit_gateway/core/http/handlers/capability_builder.hpp +++ b/src/ros2_medkit_gateway/include/ros2_medkit_gateway/core/http/handlers/capability_builder.hpp @@ -52,6 +52,8 @@ class CapabilityBuilder { RELATED_APPS, ///< Entity has related apps (components only) HOSTS, ///< Entity has host apps (functions/components) DEPENDS_ON, ///< Entity has dependencies (components only) + IS_LOCATED_ON, ///< Entity has parent component (apps only) + BELONGS_TO, ///< Entity has parent area (apps only) LOGS, ///< Entity has application log entries (components and apps) BULK_DATA, ///< Entity has bulk data endpoints (rosbags) CYCLIC_SUBSCRIPTIONS, ///< Entity has cyclic subscription endpoints diff --git a/src/ros2_medkit_gateway/include/ros2_medkit_gateway/core/models/thread_safe_entity_cache.hpp b/src/ros2_medkit_gateway/include/ros2_medkit_gateway/core/models/thread_safe_entity_cache.hpp index 0e8c7124..b2b9679d 100644 --- a/src/ros2_medkit_gateway/include/ros2_medkit_gateway/core/models/thread_safe_entity_cache.hpp +++ b/src/ros2_medkit_gateway/include/ros2_medkit_gateway/core/models/thread_safe_entity_cache.hpp @@ -208,6 +208,28 @@ class ThreadSafeEntityCache { std::optional get_app(const std::string & id) const; std::optional get_function(const std::string & id) const; + /// Atomic snapshot of an App together with its parent Component and Area. + /// + /// Three sequential get_app/get_component/get_area calls each acquire a + /// fresh shared_lock and a writer refresh can advance the generation + /// between them, so handlers that traverse App -> Component -> Area can + /// observe a mixed-generation view (e.g. app from gen N, component from + /// N+1, area from N). This helper resolves the chain under a single + /// shared_lock so the result is internally consistent. + /// + /// `app` is empty if `app_id` is not in the cache. `component` is empty if + /// the app has no `component_id` or the referenced component is missing. + /// `area` is empty if the component has no `area` or the referenced area + /// is missing. The two latter cases are distinguishable from "no parent" + /// because `app.component_id` / `component.area` remain set on the + /// returned models. + struct AppLinksSnapshot { + std::optional app; + std::optional component; + std::optional area; + }; + AppLinksSnapshot get_app_with_links(const std::string & id) const; + // --- Check existence (O(1)) --- bool has_area(const std::string & id) const; bool has_component(const std::string & id) const; diff --git a/src/ros2_medkit_gateway/src/core/discovery/models/app.cpp b/src/ros2_medkit_gateway/src/core/discovery/models/app.cpp index 2048d8af..5eff4cd8 100644 --- a/src/ros2_medkit_gateway/src/core/discovery/models/app.cpp +++ b/src/ros2_medkit_gateway/src/core/discovery/models/app.cpp @@ -125,6 +125,7 @@ json App::to_capabilities(const std::string & base_url) const { // Relationships (SOVD standard) if (!component_id.empty()) { j["is-located-on"] = base_url + "/components/" + component_id; + j["belongs-to"] = app_base + "/belongs-to"; } if (!depends_on.empty()) { j["depends-on"] = app_base + "/depends-on"; diff --git a/src/ros2_medkit_gateway/src/core/http/handlers/capability_builder.cpp b/src/ros2_medkit_gateway/src/core/http/handlers/capability_builder.cpp index bc8448d7..43adf2a3 100644 --- a/src/ros2_medkit_gateway/src/core/http/handlers/capability_builder.cpp +++ b/src/ros2_medkit_gateway/src/core/http/handlers/capability_builder.cpp @@ -41,6 +41,10 @@ std::string CapabilityBuilder::capability_to_name(Capability cap) { return "hosts"; case Capability::DEPENDS_ON: return "depends-on"; + case Capability::IS_LOCATED_ON: + return "is-located-on"; + case Capability::BELONGS_TO: + return "belongs-to"; case Capability::LOGS: return "logs"; case Capability::BULK_DATA: diff --git a/src/ros2_medkit_gateway/src/core/models/entity_capabilities.cpp b/src/ros2_medkit_gateway/src/core/models/entity_capabilities.cpp index 99dad755..ee8a8948 100644 --- a/src/ros2_medkit_gateway/src/core/models/entity_capabilities.cpp +++ b/src/ros2_medkit_gateway/src/core/models/entity_capabilities.cpp @@ -29,8 +29,11 @@ EntityCapabilities EntityCapabilities::for_type(SovdEntityType type) { ResourceCollection::LOGS, ResourceCollection::TRIGGERS, ResourceCollection::SCRIPTS, ResourceCollection::UPDATES, }; - // SERVER resources - caps.resources_ = {"docs", "version-info", "logs", "belongs-to", "depends-on", "data-categories", "data-groups"}; + // SERVER resources. SOVD (ISO 17978-3 §7.6) does not define + // /belongs-to for server, only for apps - advertising it here would + // make supports_resource("belongs-to") return true and clients would + // get 404 when following it. + caps.resources_ = {"docs", "version-info", "logs", "depends-on", "data-categories", "data-groups"}; break; case SovdEntityType::AREA: @@ -56,8 +59,11 @@ EntityCapabilities EntityCapabilities::for_type(SovdEntityType type) { ResourceCollection::LOGS, ResourceCollection::TRIGGERS, ResourceCollection::SCRIPTS, ResourceCollection::UPDATES, }; - caps.resources_ = {"docs", "logs", "hosts", "belongs-to", - "depends-on", "subcomponents", "data-categories", "data-groups"}; + // SOVD (ISO 17978-3 §7.6) defines /belongs-to only for apps; component + // exposes parent area via /is-located-on (which is itself app-only in + // the spec, but ros2_medkit treats it as the canonical area pointer). + // Listing belongs-to here would be a 404 promise. + caps.resources_ = {"docs", "logs", "hosts", "depends-on", "subcomponents", "data-categories", "data-groups"}; break; case SovdEntityType::APP: diff --git a/src/ros2_medkit_gateway/src/core/models/thread_safe_entity_cache.cpp b/src/ros2_medkit_gateway/src/core/models/thread_safe_entity_cache.cpp index c1947b17..fa008e74 100644 --- a/src/ros2_medkit_gateway/src/core/models/thread_safe_entity_cache.cpp +++ b/src/ros2_medkit_gateway/src/core/models/thread_safe_entity_cache.cpp @@ -159,6 +159,40 @@ std::optional ThreadSafeEntityCache::get_app(const std::string & id) const return std::nullopt; } +ThreadSafeEntityCache::AppLinksSnapshot ThreadSafeEntityCache::get_app_with_links(const std::string & id) const { + AppLinksSnapshot snapshot; + std::shared_lock lock(mutex_); + + auto app_it = app_index_.find(id); + if (app_it == app_index_.end() || app_it->second >= apps_.size()) { + return snapshot; + } + snapshot.app = apps_[app_it->second]; + + const auto & component_id = snapshot.app->component_id; + if (component_id.empty()) { + return snapshot; + } + + auto comp_it = component_index_.find(component_id); + if (comp_it == component_index_.end() || comp_it->second >= components_.size()) { + return snapshot; // component referenced but missing - leave .component empty + } + snapshot.component = components_[comp_it->second]; + + const auto & area_id = snapshot.component->area; + if (area_id.empty()) { + return snapshot; + } + + auto area_it = area_index_.find(area_id); + if (area_it == area_index_.end() || area_it->second >= areas_.size()) { + return snapshot; // area referenced but missing + } + snapshot.area = areas_[area_it->second]; + return snapshot; +} + std::optional ThreadSafeEntityCache::get_function(const std::string & id) const { std::shared_lock lock(mutex_); auto it = function_index_.find(id); diff --git a/src/ros2_medkit_gateway/test/test_discovery_models.cpp b/src/ros2_medkit_gateway/test/test_discovery_models.cpp index 06fe0bb2..0d1c600d 100644 --- a/src/ros2_medkit_gateway/test/test_discovery_models.cpp +++ b/src/ros2_medkit_gateway/test/test_discovery_models.cpp @@ -250,6 +250,33 @@ TEST_F(AppModelTest, ToJson_ExternalWhenTrue) { EXPECT_EQ(j["x-medkit"]["external"], true); } +// @verifies REQ_INTEROP_105 REQ_INTEROP_106 +TEST_F(AppModelTest, ToCapabilities_ContainsRelationshipLinks) { + json j = app_.to_capabilities("http://localhost:8080/api/v1"); + + // Both is-located-on and belongs-to are gated on the same component_id + // condition - handler emits the same set in handle_get_app, so + // to_capabilities must stay in sync for notification and snapshot + // consumers that go through the model. + EXPECT_EQ(j["is-located-on"], "http://localhost:8080/api/v1/components/navigation_server"); + EXPECT_EQ(j["belongs-to"], "http://localhost:8080/api/v1/apps/nav2/belongs-to"); + EXPECT_EQ(j["depends-on"], "http://localhost:8080/api/v1/apps/nav2/depends-on"); +} + +TEST_F(AppModelTest, ToCapabilities_OmitsRelationshipLinksWhenAppIsStandalone) { + App standalone; + standalone.id = "standalone"; + standalone.name = "Standalone"; + standalone.source = "manifest"; + // No component_id, no depends_on - mirrors discovery_handlers.cpp gating. + + json j = standalone.to_capabilities("http://localhost:8080/api/v1"); + + EXPECT_FALSE(j.contains("is-located-on")); + EXPECT_FALSE(j.contains("belongs-to")); + EXPECT_FALSE(j.contains("depends-on")); +} + TEST_F(AppModelTest, ToJson_OmitsEmptyOptionalFields) { App minimal; minimal.id = "test"; diff --git a/src/ros2_medkit_integration_tests/test/features/test_hateoas.test.py b/src/ros2_medkit_integration_tests/test/features/test_hateoas.test.py index e758d9b9..48583a64 100644 --- a/src/ros2_medkit_integration_tests/test/features/test_hateoas.test.py +++ b/src/ros2_medkit_integration_tests/test/features/test_hateoas.test.py @@ -415,21 +415,20 @@ def test_depends_on_apps_nonexistent(self): data = response.json() self.assertIn('error_code', data) - self.assertEqual(data['message'], 'App not found') + # validate_entity_for_route emits the generic Entity not found shape + # so the gateway can also forward unknown apps to peers when running + # in an aggregation cluster. + self.assertEqual(data['message'], 'Entity not found') self.assertIn('parameters', data) - self.assertIn('app_id', data['parameters']) - self.assertEqual(data['parameters'].get('app_id'), 'nonexistent_app') + self.assertEqual(data['parameters'].get('entity_id'), 'nonexistent_app') def test_is_located_on_apps_has_href(self): """GET /apps/{id}/is-located-on returns 0-or-1 component hrefs.""" # @verifies REQ_INTEROP_105 - response = requests.get( - f'{self.BASE_URL}/apps/engine-temp-sensor/is-located-on', - timeout=10 - ) - self.assertEqual(response.status_code, 200) + # poll_endpoint adds retry under sanitizer/CI load - the gateway may + # take a moment to discover entities after launch. + data = self.poll_endpoint('/apps/engine-temp-sensor/is-located-on') - data = response.json() self.assertIn('items', data) self.assertIn('_links', data) self.assertEqual(data['_links']['self'], '/api/v1/apps/engine-temp-sensor/is-located-on') @@ -457,20 +456,16 @@ def test_is_located_on_apps_nonexistent(self): data = response.json() self.assertIn('error_code', data) - self.assertEqual(data['message'], 'App not found') + self.assertEqual(data['message'], 'Entity not found') self.assertIn('parameters', data) - self.assertEqual(data['parameters'].get('app_id'), 'nonexistent_app') + self.assertEqual(data['parameters'].get('entity_id'), 'nonexistent_app') def test_belongs_to_apps_returns_parent_area(self): """GET /apps/{id}/belongs-to returns the parent area via component.""" # @verifies REQ_INTEROP_106 - response = requests.get( - f'{self.BASE_URL}/apps/engine-temp-sensor/belongs-to', - timeout=10 - ) - self.assertEqual(response.status_code, 200) + # poll_endpoint adds retry under sanitizer/CI load. + data = self.poll_endpoint('/apps/engine-temp-sensor/belongs-to') - data = response.json() self.assertIn('items', data) self.assertIn('_links', data) self.assertEqual(data['_links']['self'], '/api/v1/apps/engine-temp-sensor/belongs-to') @@ -495,9 +490,9 @@ def test_belongs_to_apps_nonexistent(self): data = response.json() self.assertIn('error_code', data) - self.assertEqual(data['message'], 'App not found') + self.assertEqual(data['message'], 'Entity not found') self.assertIn('parameters', data) - self.assertEqual(data['parameters'].get('app_id'), 'nonexistent_app') + self.assertEqual(data['parameters'].get('entity_id'), 'nonexistent_app') # ------------------------------------------------------------------ # Functions (test_81) From c71c73b9dbe088e178589edb63860c001df98245 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Thu, 14 May 2026 18:12:20 +0200 Subject: [PATCH 11/16] docs(api): remove cross-project sphinx ref from belongs-to description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The :ref:`sovd-api-discovery` target lives in the separate knowledge/ Sphinx project and is not reachable from ros2_medkit's docs build, so sphinx-build -W rejected it as an undefined label. Keep the ISO 17978-3 §7.6 citation in prose; consumers can look the section up directly. --- docs/api/rest.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/rest.rst b/docs/api/rest.rst index 13f5c0ad..ee21d7e4 100644 --- a/docs/api/rest.rst +++ b/docs/api/rest.rst @@ -280,7 +280,7 @@ Apps ``GET /api/v1/apps/{app_id}/belongs-to`` Return the area that contains this app via its parent component. - Per SOVD (ISO 17978-3 §7.6, see :ref:`sovd-api-discovery`), the corresponding + Per SOVD (ISO 17978-3 §7.6), the corresponding ``belongs-to`` URI reference in ``GET /apps/{app_id}`` is only emitted when the app has a parent component (i.e. is not standalone). Standalone apps do not expose this subresource in HATEOAS and the endpoint will return an empty From dfc4dae206b1d01be36a76a919d22ca150cc03c7 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Thu, 14 May 2026 19:37:04 +0200 Subject: [PATCH 12/16] fix(gateway): atomic snapshot for app dependencies + integration edge cases handle_app_depends_on iterates app.depends_on and resolves each entry via cache.get_app(). Each call takes a fresh shared_lock, which lets a writer refresh advance the cache generation between the app fetch and the N per-dependency fetches - a 5-dependency app could return 5 dependencies from 5 different generations. Add ThreadSafeEntityCache::get_app_with_dependencies() that resolves the app plus every entry in its depends_on under a single shared_lock, and switch the handler to use it. Discovery-manager fallback preserves the old shape (no atomic guarantee, but rare path used when the entity isn't cached yet). Seed the demo manifest with three HATEOAS edge-case fixtures (hateoas-edge-area, hateoas-component-no-area, hateoas-app-standalone, hateoas-app-on-area-less-component) and add the matching integration tests so the standalone-app and component-without-area branches of /belongs-to and /is-located-on are exercised end-to-end, not just in unit tests. The 'unresolved component' and 'dangling area' branches are not seeded in the integration manifest because validators R007 (app -> missing component) and R006 (component -> missing area) reject the manifest and trigger a fallback to runtime_only discovery, which disables the rest of the suite. Both branches remain covered by the unit tests AppBelongsToReturnsMissingItemWhenParentComponentUnresolved and AppBelongsToReturnsMissingItemWhenAreaUnresolved. --- .../config/examples/demo_nodes_manifest.yaml | 44 +++++++++++++++++++ .../core/models/thread_safe_entity_cache.hpp | 18 ++++++++ .../core/models/thread_safe_entity_cache.cpp | 23 ++++++++++ .../src/http/handlers/discovery_handlers.cpp | 23 ++++++---- .../test/features/test_hateoas.test.py | 44 +++++++++++++++++++ 5 files changed, 143 insertions(+), 9 deletions(-) diff --git a/src/ros2_medkit_gateway/config/examples/demo_nodes_manifest.yaml b/src/ros2_medkit_gateway/config/examples/demo_nodes_manifest.yaml index f7f01133..7947a315 100644 --- a/src/ros2_medkit_gateway/config/examples/demo_nodes_manifest.yaml +++ b/src/ros2_medkit_gateway/config/examples/demo_nodes_manifest.yaml @@ -74,6 +74,14 @@ areas: namespace: /perception/lidar description: "LiDAR sensor system" + # Isolated area used by HATEOAS edge-case manifest entries below; not bound + # to a running ROS 2 node, exists only to give the unresolved/standalone/ + # area-less tests deterministic targets. + - id: hateoas-edge-area + name: "HATEOAS Edge Area" + namespace: /hateoas_edge + description: "Reserved for integration test edge cases" + # ============================================================================= # COMPONENTS - Hardware units # ============================================================================= @@ -135,6 +143,20 @@ components: name: "LiDAR Unit" type: "sensor" area: lidar + + # HATEOAS edge-case fixtures (manifest-only, no runtime nodes). + # See test_hateoas.test.py for the corresponding test cases. + - id: hateoas-component-no-area + name: "HATEOAS Edge: component without area" + type: "sensor" + # area intentionally omitted - exercises the + # 'parent component resolved but has no area_id' branch. + + # Note: the 'dangling area reference' branch (component points at a + # non-existent area id) is covered by unit test + # AppBelongsToReturnsMissingItemWhenAreaUnresolved. We can't seed a + # component with a missing area_id here because manifest validator R006 + # rejects it and disables manifest-driven discovery for the whole suite. description: "360° laser scanner with fault reporting" # ============================================================================= @@ -231,6 +253,28 @@ apps: node_name: lidar_sensor namespace: /perception/lidar + # === HATEOAS edge-case apps (manifest-only, no ros_binding) === + # Each targets a specific branch of handle_app_belongs_to / + # handle_app_is_located_on so the integration suite exercises the same + # shapes the unit tests cover. + - id: hateoas-app-standalone + name: "HATEOAS Edge: standalone app" + category: "fixture" + description: "App without a parent component (is_located_on omitted)" + + # Note: the 'unresolved parent component' branch is covered by unit test + # AppBelongsToReturnsMissingItemWhenParentComponentUnresolved instead - the + # manifest validator (R007) rejects apps that reference a non-existent + # component even with manifest_strict_validation=false, so we can't seed it + # here as an integration fixture without disabling discovery for the rest + # of the suite. + + - id: hateoas-app-on-area-less-component + name: "HATEOAS Edge: app on component without area" + category: "fixture" + description: "Parent component resolves but has no area_id assigned" + is_located_on: hateoas-component-no-area + # ============================================================================= # FUNCTIONS - High-level capabilities # ============================================================================= diff --git a/src/ros2_medkit_gateway/include/ros2_medkit_gateway/core/models/thread_safe_entity_cache.hpp b/src/ros2_medkit_gateway/include/ros2_medkit_gateway/core/models/thread_safe_entity_cache.hpp index b2b9679d..7bb1f8d3 100644 --- a/src/ros2_medkit_gateway/include/ros2_medkit_gateway/core/models/thread_safe_entity_cache.hpp +++ b/src/ros2_medkit_gateway/include/ros2_medkit_gateway/core/models/thread_safe_entity_cache.hpp @@ -230,6 +230,24 @@ class ThreadSafeEntityCache { }; AppLinksSnapshot get_app_with_links(const std::string & id) const; + /// Atomic snapshot of an App together with its declared `depends_on` apps. + /// + /// `handle_app_depends_on` iterates `app.depends_on` and resolves each id + /// via `get_app()` - one shared_lock per dependency. A writer refresh can + /// land between the app fetch and any of the per-dependency fetches, so a + /// 5-dependency app can return 5 apps from 5 different cache generations. + /// This helper takes a single shared_lock and returns the app together + /// with every dependency resolved in the same generation. + /// + /// `app` is empty if `app_id` is not in the cache. `dependencies` lists + /// every entry in `app->depends_on` in declaration order; the optional is + /// empty when the referenced dependency cannot be resolved (broken ref). + struct AppDependenciesSnapshot { + std::optional app; + std::vector>> dependencies; + }; + AppDependenciesSnapshot get_app_with_dependencies(const std::string & id) const; + // --- Check existence (O(1)) --- bool has_area(const std::string & id) const; bool has_component(const std::string & id) const; diff --git a/src/ros2_medkit_gateway/src/core/models/thread_safe_entity_cache.cpp b/src/ros2_medkit_gateway/src/core/models/thread_safe_entity_cache.cpp index fa008e74..4b3316f2 100644 --- a/src/ros2_medkit_gateway/src/core/models/thread_safe_entity_cache.cpp +++ b/src/ros2_medkit_gateway/src/core/models/thread_safe_entity_cache.cpp @@ -193,6 +193,29 @@ ThreadSafeEntityCache::AppLinksSnapshot ThreadSafeEntityCache::get_app_with_link return snapshot; } +ThreadSafeEntityCache::AppDependenciesSnapshot +ThreadSafeEntityCache::get_app_with_dependencies(const std::string & id) const { + AppDependenciesSnapshot snapshot; + std::shared_lock lock(mutex_); + + auto app_it = app_index_.find(id); + if (app_it == app_index_.end() || app_it->second >= apps_.size()) { + return snapshot; + } + snapshot.app = apps_[app_it->second]; + + snapshot.dependencies.reserve(snapshot.app->depends_on.size()); + for (const auto & dep_id : snapshot.app->depends_on) { + auto dep_it = app_index_.find(dep_id); + if (dep_it != app_index_.end() && dep_it->second < apps_.size()) { + snapshot.dependencies.emplace_back(dep_id, apps_[dep_it->second]); + } else { + snapshot.dependencies.emplace_back(dep_id, std::nullopt); + } + } + return snapshot; +} + std::optional ThreadSafeEntityCache::get_function(const std::string & id) const { std::shared_lock lock(mutex_); auto it = function_index_.find(id); diff --git a/src/ros2_medkit_gateway/src/http/handlers/discovery_handlers.cpp b/src/ros2_medkit_gateway/src/http/handlers/discovery_handlers.cpp index d884c2db..4541456c 100644 --- a/src/ros2_medkit_gateway/src/http/handlers/discovery_handlers.cpp +++ b/src/ros2_medkit_gateway/src/http/handlers/discovery_handlers.cpp @@ -1052,28 +1052,33 @@ void DiscoveryHandlers::handle_app_depends_on(const httplib::Request & req, http return; } - // Local entity: fetch full App model for relationship data. + // Atomic snapshot keeps the app + every dependency resolved in the same + // cache generation; a writer refresh between per-dependency lookups could + // otherwise yield a mix from N different generations. const auto & cache = ctx_.node()->get_thread_safe_cache(); - auto app_opt = cache.get_app(app_id); - if (!app_opt) { + auto snapshot = cache.get_app_with_dependencies(app_id); + if (!snapshot.app) { auto discovery = ctx_.node()->get_discovery_manager(); - app_opt = discovery->get_app(app_id); + snapshot.app = discovery->get_app(app_id); + if (snapshot.app) { + snapshot.dependencies.reserve(snapshot.app->depends_on.size()); + for (const auto & dep_id : snapshot.app->depends_on) { + snapshot.dependencies.emplace_back(dep_id, discovery->get_app(dep_id)); + } + } } - if (!app_opt) { + if (!snapshot.app) { HandlerContext::send_error(res, 404, ERR_ENTITY_NOT_FOUND, "App not found", {{"app_id", app_id}}); return; } - const auto & app = *app_opt; - json items = json::array(); - for (const auto & dep_id : app.depends_on) { + for (const auto & [dep_id, dep_opt] : snapshot.dependencies) { json item; item["id"] = dep_id; item["href"] = "/api/v1/apps/" + dep_id; - auto dep_opt = cache.get_app(dep_id); if (dep_opt) { item["name"] = dep_opt->name.empty() ? dep_id : dep_opt->name; diff --git a/src/ros2_medkit_integration_tests/test/features/test_hateoas.test.py b/src/ros2_medkit_integration_tests/test/features/test_hateoas.test.py index 48583a64..549ee44c 100644 --- a/src/ros2_medkit_integration_tests/test/features/test_hateoas.test.py +++ b/src/ros2_medkit_integration_tests/test/features/test_hateoas.test.py @@ -494,6 +494,50 @@ def test_belongs_to_apps_nonexistent(self): self.assertIn('parameters', data) self.assertEqual(data['parameters'].get('entity_id'), 'nonexistent_app') + # ------------------------------------------------------------------ + # HATEOAS edge cases: standalone / broken-ref / missing-area apps. + # Fixtures defined in demo_nodes_manifest.yaml (hateoas-app-*). + # ------------------------------------------------------------------ + + def test_belongs_to_standalone_app_returns_empty(self): + """Standalone app (no parent component) returns 200 with empty items. + + @verifies REQ_INTEROP_106 + """ + data = self.poll_endpoint('/apps/hateoas-app-standalone/belongs-to') + self.assertEqual(data['items'], []) + self.assertEqual(data['x-medkit']['total_count'], 0) + + def test_belongs_to_app_on_area_less_component_returns_empty(self): + """Component without area assignment returns 200 with empty items. + + @verifies REQ_INTEROP_106 + """ + data = self.poll_endpoint('/apps/hateoas-app-on-area-less-component/belongs-to') + self.assertEqual(data['items'], []) + self.assertEqual(data['x-medkit']['total_count'], 0) + + # Note: the 'unresolved parent component' branch on /belongs-to and + # /is-located-on is covered by the unit tests + # AppBelongsToReturnsMissingItemWhenParentComponentUnresolved and + # AppIsLocatedOnReturnsMissingItemWhenHostComponentUnresolved instead - + # the manifest validator rejects an app that points at a non-existent + # component, so we can't seed that scenario as an integration fixture. + + # Note: the 'dangling area reference' branch is covered by unit test + # AppBelongsToReturnsMissingItemWhenAreaUnresolved - manifest validator + # R006 rejects a component that points at a non-existent area, so we + # can't seed that scenario as an integration fixture without disabling + # manifest-driven discovery for the whole suite. + + def test_is_located_on_standalone_app_returns_empty(self): + """Standalone app returns 200 with empty items on /is-located-on too. + + @verifies REQ_INTEROP_105 + """ + data = self.poll_endpoint('/apps/hateoas-app-standalone/is-located-on') + self.assertEqual(data['items'], []) + # ------------------------------------------------------------------ # Functions (test_81) # ------------------------------------------------------------------ From ee86ef3131819d3618761daf799b64f7b997bf2a Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Thu, 14 May 2026 20:12:35 +0200 Subject: [PATCH 13/16] test(integration): account for HATEOAS edge-case manifest fixtures in count assertions The new hateoas-edge-area, hateoas-component-no-area, hateoas-app-standalone and hateoas-app-on-area-less-component manifest entries (added to support the standalone-app / area-less integration tests in test_hateoas) tripped the exact-count assertions in test_hybrid_suppression and the allow-list-style checks in test_discovery_gap_fill. Both suites load the same demo_nodes_manifest.yaml. Extend MANIFEST_ALL_AREAS, MANIFEST_TOP_LEVEL_AREAS, MANIFEST_COMPONENTS and MANIFEST_APPS in test_hybrid_suppression to include the new ids, plus add MANIFEST_APPS_WITH_BINDING so the test_manifest_apps_are_online check skips the manifest-only fixtures (no ros_binding means they never reach is_online=True). Mirror the allow-list updates in test_discovery_gap_fill. --- .../features/test_discovery_gap_fill.test.py | 4 ++++ .../features/test_hybrid_suppression.test.py | 21 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/src/ros2_medkit_integration_tests/test/features/test_discovery_gap_fill.test.py b/src/ros2_medkit_integration_tests/test/features/test_discovery_gap_fill.test.py index 48070f53..bda3db88 100644 --- a/src/ros2_medkit_integration_tests/test/features/test_discovery_gap_fill.test.py +++ b/src/ros2_medkit_integration_tests/test/features/test_discovery_gap_fill.test.py @@ -92,6 +92,8 @@ def test_only_manifest_areas_present(self): for area_id in area_ids: self.assertIn(area_id, [ 'powertrain', 'chassis', 'body', 'perception', + # HATEOAS edge-case fixture, manifest-only. + 'hateoas-edge-area', ], f"Unexpected area found in top-level listing: {area_id}") def test_only_manifest_components_present(self): @@ -109,6 +111,8 @@ def test_only_manifest_components_present(self): 'brake-ecu', 'brake-pressure-sensor-hw', 'brake-actuator-hw', 'door-sensor-hw', 'light-module', 'lidar-unit', + # HATEOAS edge-case fixture (component without area assignment). + 'hateoas-component-no-area', ] for comp_id in component_ids: self.assertIn( diff --git a/src/ros2_medkit_integration_tests/test/features/test_hybrid_suppression.test.py b/src/ros2_medkit_integration_tests/test/features/test_hybrid_suppression.test.py index 891f627b..fe9ceb47 100644 --- a/src/ros2_medkit_integration_tests/test/features/test_hybrid_suppression.test.py +++ b/src/ros2_medkit_integration_tests/test/features/test_hybrid_suppression.test.py @@ -58,21 +58,34 @@ MANIFEST_ALL_AREAS = { 'powertrain', 'engine', 'chassis', 'brakes', 'body', 'door', 'front-left-door', 'lights', 'perception', 'lidar', + # HATEOAS edge-case fixture (manifest-only) + 'hateoas-edge-area', } # Only top-level areas appear in GET /areas; subareas are filtered MANIFEST_TOP_LEVEL_AREAS = { 'powertrain', 'chassis', 'body', 'perception', + 'hateoas-edge-area', } MANIFEST_SUBAREAS = MANIFEST_ALL_AREAS - MANIFEST_TOP_LEVEL_AREAS MANIFEST_COMPONENTS = { 'engine-ecu', 'temp-sensor-hw', 'rpm-sensor-hw', 'brake-ecu', 'brake-pressure-sensor-hw', 'brake-actuator-hw', 'door-sensor-hw', 'light-module', 'lidar-unit', + # HATEOAS edge-case fixture (component without area assignment) + 'hateoas-component-no-area', } MANIFEST_APPS = { 'engine-temp-sensor', 'engine-rpm-sensor', 'engine-calibration-service', 'engine-long-calibration', 'brake-pressure-sensor', 'brake-actuator', 'door-status-sensor', 'light-controller', 'lidar-sensor', + # HATEOAS edge-case fixtures (manifest-only, no ros_binding) + 'hateoas-app-standalone', 'hateoas-app-on-area-less-component', +} +# Subset that has a ros_binding and therefore can be is_online once the +# runtime nodes are launched. Manifest-only fixtures (no binding) are +# present in the cache but never reach online state. +MANIFEST_APPS_WITH_BINDING = MANIFEST_APPS - { + 'hateoas-app-standalone', 'hateoas-app-on-area-less-component', } MANIFEST_FUNCTIONS = { 'engine-monitoring', 'engine-calibration', 'brake-system', @@ -260,6 +273,11 @@ def test_manifest_apps_are_online(self): Even with allow_heuristic_apps=false (gap-fill blocking runtime apps from the entity tree), the linker must still receive the unfiltered runtime apps so it can bind manifest apps to live nodes. + + Apps in ``MANIFEST_APPS_WITH_BINDING`` must be online; manifest-only + fixtures (no ros_binding) are exempt - they are present in the cache + but never reach online state because no runtime node was launched + for them. """ # @verifies REQ_INTEROP_003 data = self.poll_endpoint_until( @@ -267,10 +285,13 @@ def test_manifest_apps_are_online(self): lambda d: d if all( a.get('x-medkit', {}).get('is_online', False) for a in d.get('items', []) + if a['id'] in MANIFEST_APPS_WITH_BINDING ) and len(d.get('items', [])) == len(MANIFEST_APPS) else None, timeout=30.0, ) for app in data['items']: + if app['id'] not in MANIFEST_APPS_WITH_BINDING: + continue x_medkit = app.get('x-medkit', {}) is_online = x_medkit.get('is_online', False) self.assertTrue( From c220212a9cb1faa525c324a0dce1972386dd42e3 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Thu, 14 May 2026 20:40:12 +0200 Subject: [PATCH 14/16] test(integration): extend manifest_only allow-list with HATEOAS edge fixtures test_scenario_discovery_manifest::test_25_no_topic_components_in_manifest_only hard-codes the set of expected component IDs and rejects any others, so the hateoas-component-no-area fixture (added in dfc4dae2 to support the new integration tests for the standalone / area-less branches) tripped it under manifest_only mode. Add the fixture id to the allow-list. --- .../test/scenarios/test_scenario_discovery_manifest.test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ros2_medkit_integration_tests/test/scenarios/test_scenario_discovery_manifest.test.py b/src/ros2_medkit_integration_tests/test/scenarios/test_scenario_discovery_manifest.test.py index 7aea73fe..2718ddc9 100644 --- a/src/ros2_medkit_integration_tests/test/scenarios/test_scenario_discovery_manifest.test.py +++ b/src/ros2_medkit_integration_tests/test/scenarios/test_scenario_discovery_manifest.test.py @@ -382,6 +382,8 @@ def test_25_no_topic_components_in_manifest_only(self): 'engine-ecu', 'temp-sensor-hw', 'rpm-sensor-hw', 'brake-ecu', 'brake-pressure-sensor-hw', 'brake-actuator-hw', 'door-sensor-hw', 'light-module', 'lidar-unit', + # HATEOAS edge-case fixture (component without area assignment). + 'hateoas-component-no-area', } data = self.get_json('/components') actual_ids = {c['id'] for c in data['items']} From 763888a65d57b92ee11ec9a3780e2f3f88e64abe Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Thu, 14 May 2026 21:37:33 +0200 Subject: [PATCH 15/16] ci(quality): build ASan with RelWithDebInfo and free more container disk ASan job hit 'ld: final link failed: No space left on device' three runs in a row on the c220212a CI cycle, deterministically aborting at the test_plugin_entity_routing / test_plugin_manager link step. The github-hosted runner gives the container ~14 GB on /, and the combination of Debug (-O0 -g) plus ASan + UBSan instrumentation produces enough object-file/debug-info bloat to exhaust that even with ccache warm. - Switch -DCMAKE_BUILD_TYPE=Debug to -DCMAKE_BUILD_TYPE=RelWithDebInfo for sanitizer-asan, matching what sanitizer-tsan already uses. The ROS2MedkitSanitizers cmake module overrides optimisation to -O1 in both cases, so this only drops the -O0 expansion and keeps debug info. ASan/UBSan stack traces stay readable. - Extend the Free disk space step with /var/cache/apt, /var/lib/apt/lists, /usr/share/doc, /usr/share/man, /usr/share/locale cleanup inside the container, plus df -h before and after the build for diagnostics. No source change - the test/build behaviour is preserved; only the disk footprint shrinks. --- .github/workflows/quality.yml | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml index 415ec189..d964cab4 100644 --- a/.github/workflows/quality.yml +++ b/.github/workflows/quality.yml @@ -231,6 +231,12 @@ jobs: run: | rm -rf /usr/local/lib/android /usr/share/dotnet /opt/ghc || true apt-get clean + # Container-level cleanup so the ASan build (large debug-info + # binaries) has room on the overlay filesystem. The host / + # is only ~14 GB on github-hosted runners. + rm -rf /var/cache/apt/* /var/lib/apt/lists/* || true + rm -rf /usr/share/doc/* /usr/share/man/* /usr/share/locale/* || true + df -h / - name: Install Git run: | @@ -271,10 +277,18 @@ jobs: CCACHE_SLOPPINESS: pch_defines,time_macros run: | source /opt/ros/jazzy/setup.bash + # RelWithDebInfo (not Debug): Debug uses -O0 which produces + # binaries ~2x the size of -O1, and the ASan/UBSan instrumentation + # already inflates them several times over. On github-hosted + # runners (~14 GB /) the linker hits 'No space left on device' on + # PR branches that miss the ccache warm-up. The sanitizer cmake + # module overrides optimisation back to -O1 either way, so this + # only changes debug info size. colcon build --symlink-install \ - --cmake-args -DCMAKE_BUILD_TYPE=Debug -DSANITIZER=asan,ubsan \ + --cmake-args -DCMAKE_BUILD_TYPE=RelWithDebInfo -DSANITIZER=asan,ubsan \ --event-handlers console_direct+ ccache -s + df -h / - name: Extend test timeouts for ASan overhead run: | From d6b9bc19b98c0327450f949d9d1a845af181dcfd Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Thu, 14 May 2026 21:54:19 +0200 Subject: [PATCH 16/16] ci(quality): revert aggressive doc/man/locale cleanup that broke ROS apt install The aggressive disk cleanup in the prior commit rm'd /usr/share/doc, /usr/share/man and /usr/share/locale. dpkg postinstall scripts for ros-jazzy-desktop, openjdk-21, libvtk9 and libpcl-dev fail when those trees are missing - the ASan job aborted after 5 min in 'Set up ROS 2 Jazzy' with dpkg returning 100 on every dependency. Roll those three rm -rf paths back. Keep the RelWithDebInfo build-type change (which was the actual fix for the ASan disk-space failure) and the df -h diagnostic. --- .github/workflows/quality.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml index d964cab4..eabac9d6 100644 --- a/.github/workflows/quality.yml +++ b/.github/workflows/quality.yml @@ -229,13 +229,10 @@ jobs: steps: - name: Free disk space run: | + # Host paths are no-ops inside the container - safe to keep + # for the case where the workflow is run without `container:`. rm -rf /usr/local/lib/android /usr/share/dotnet /opt/ghc || true apt-get clean - # Container-level cleanup so the ASan build (large debug-info - # binaries) has room on the overlay filesystem. The host / - # is only ~14 GB on github-hosted runners. - rm -rf /var/cache/apt/* /var/lib/apt/lists/* || true - rm -rf /usr/share/doc/* /usr/share/man/* /usr/share/locale/* || true df -h / - name: Install Git