diff --git a/fhircraft/fhir/packages/client.py b/fhircraft/fhir/packages/client.py index 5577c1ed..062fa091 100644 --- a/fhircraft/fhir/packages/client.py +++ b/fhircraft/fhir/packages/client.py @@ -263,8 +263,8 @@ def _process_package_tar( Args: tar_file: Opened tar file containing the package """ - results_count = 0 errors = [] + results = [] # First, look for package.json to find dependencies if install_dependencies: @@ -287,12 +287,13 @@ def _process_package_tar( if f"{dependency}@{version}" in self.history: continue try: - self.load_resources_from_package( + dependency_results = self.load_resources_from_package( target_resource, dependency, version, fail_if_exists=False, ) + results.extend(dependency_results) except Exception as e: errors.append( f"Failed to download and load dependency {dependency}: {e}" @@ -301,7 +302,6 @@ def _process_package_tar( errors.append( f"Error processing package.json looking for dependencies: {e}" ) - results = [] for member in tar_file.getmembers(): if not member.isfile(): continue @@ -324,7 +324,6 @@ def _process_package_tar( # Check if it's a StructureDefinition resource if json_data.get("resourceType") == target_resource: results.append(json_data) - results_count += 1 except Exception as e: errors.append(f"Error processing {member.name}: {e}") diff --git a/fhircraft/fhir/resources/definitions/registry.py b/fhircraft/fhir/resources/definitions/registry.py index de0ff9fe..3bd3c8e2 100644 --- a/fhircraft/fhir/resources/definitions/registry.py +++ b/fhircraft/fhir/resources/definitions/registry.py @@ -9,6 +9,7 @@ from dataclasses import asdict, dataclass, field from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Union +import warnings import requests from pydantic import BaseModel @@ -293,13 +294,30 @@ def download_url(url: str) -> Dict[str, Any]: response.raise_for_status() return response.json() - def download_package(self, package_name: str, version: str) -> None: + def download_package( + self, + package_name: str, + version: str, + skip_invalid: bool = False, + include_dependencies: bool = True, + ) -> None: """Download a package from the registry and add its structure definitions to the registry.""" for sd in self._package_client.load_resources_from_package( - "StructureDefinition", package_name, version + "StructureDefinition", + package_name, + version, + install_dependencies=include_dependencies, ): - structure_definition = self._validate_structure_definition(sd) - self.add(structure_definition) + try: + structure_definition = self._validate_structure_definition(sd) + self.add(structure_definition) + except ValueError as e: + if not skip_invalid: + raise e + else: + warnings.warn( + f"Skipping invalid structure definition {sd.get('url', 'unknown')} in package {package_name} version {version}:\n{e}" + ) def set_registry_base_url(self, base_url: str) -> None: """Change the package registry base URL.""" diff --git a/fhircraft/fhir/resources/factory/core.py b/fhircraft/fhir/resources/factory/core.py index 0320de4c..2bafb27e 100644 --- a/fhircraft/fhir/resources/factory/core.py +++ b/fhircraft/fhir/resources/factory/core.py @@ -108,15 +108,25 @@ def build( return self._build(structure_definition, mixins=mixins, mode=mode) - def register_package(self, package_name: str, version: str) -> None: + def register_package( + self, + package_name: str, + version: str, + skip_invalid: bool = False, + include_dependencies: bool = True, + ) -> None: """ Download and register all StructureDefinitions from a FHIR npm package. Args: package_name: Package identifier (e.g. ``"hl7.fhir.us.mcode"``). version: Package version string (e.g. ``"1.0.0"``). + skip_invalid: Whether to skip invalid StructureDefinitions. + include_dependencies: Whether to include dependencies when downloading the package. """ - self.definition_registry.download_package(package_name, version) + self.definition_registry.download_package( + package_name, version, skip_invalid, include_dependencies + ) def register( self, diff --git a/test/test_fhir_packages_client.py b/test/test_fhir_packages_client.py index fbb45f6f..c94f3152 100644 --- a/test/test_fhir_packages_client.py +++ b/test/test_fhir_packages_client.py @@ -1,6 +1,8 @@ """Tests for FHIR Package Registry client.""" +import io import json +import tarfile from unittest.mock import Mock, patch import pytest @@ -13,6 +15,29 @@ ) +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_tar(files: dict) -> tarfile.TarFile: + """Build an in-memory TarFile from a dict of {filename: bytes_or_str}.""" + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w:gz") as tf: + for name, content in files.items(): + if isinstance(content, str): + content = content.encode("utf-8") + info = tarfile.TarInfo(name=name) + info.size = len(content) + tf.addfile(info, io.BytesIO(content)) + buf.seek(0) + return tarfile.open(fileobj=buf, mode="r:gz") + + +def _sd(url: str, resource_type: str = "StructureDefinition") -> str: + return json.dumps({"resourceType": resource_type, "url": url}) + + class TestFHIRPackageRegistryClient: """Test cases for FHIRPackageRegistryClient.""" @@ -194,3 +219,250 @@ def test_package_metadata_minimal(self): assert metadata.dist_tags is None assert metadata.versions is None assert metadata.versions is None + + +# =========================================================================== +# TestProcessPackageTar +# =========================================================================== + + +class TestProcessPackageTar: + """Tests for FHIRPackageRegistryClient._process_package_tar.""" + + def test_returns_sds_from_package(self): + """SDs in the tar file are extracted and returned.""" + sd_url = "http://example.org/StructureDefinition/MyProfile" + tar = _make_tar( + { + "package/StructureDefinition-MyProfile.json": _sd(sd_url), + } + ) + client = FHIRPackageRegistryClient() + results, errors = client._process_package_tar("StructureDefinition", tar) + assert errors == [] + assert len(results) == 1 + assert results[0]["url"] == sd_url + + def test_no_dependencies_when_package_json_absent(self): + """When there is no package.json the dependency loop is skipped.""" + sd_url = "http://example.org/StructureDefinition/MyProfile" + tar = _make_tar( + { + "package/StructureDefinition-MyProfile.json": _sd(sd_url), + } + ) + client = FHIRPackageRegistryClient() + with patch.object(client, "load_resources_from_package") as mock_load: + results, errors = client._process_package_tar("StructureDefinition", tar) + mock_load.assert_not_called() + assert len(results) == 1 + + def test_dependency_sds_included_in_results(self): + """SDs from dependencies are merged into the result list.""" + dep_url = "http://example.org/StructureDefinition/DepProfile" + dep_sd = {"resourceType": "StructureDefinition", "url": dep_url} + + package_json = json.dumps( + {"name": "my.pkg", "version": "1.0.0", "dependencies": {"dep.pkg": "1.0.0"}} + ) + tar = _make_tar({"package/package.json": package_json}) + + client = FHIRPackageRegistryClient() + with patch.object( + client, + "load_resources_from_package", + return_value=[dep_sd], + ) as mock_load: + results, errors = client._process_package_tar("StructureDefinition", tar) + + mock_load.assert_called_once_with( + "StructureDefinition", "dep.pkg", "1.0.0", fail_if_exists=False + ) + assert errors == [] + assert dep_sd in results + + def test_multiple_dependency_sds_all_included(self): + """SDs from multiple dependencies are all present in results.""" + dep_a = {"resourceType": "StructureDefinition", "url": "http://example.org/A"} + dep_b = {"resourceType": "StructureDefinition", "url": "http://example.org/B"} + + package_json = json.dumps( + { + "name": "my.pkg", + "version": "1.0.0", + "dependencies": {"dep.a": "1.0.0", "dep.b": "2.0.0"}, + } + ) + tar = _make_tar({"package/package.json": package_json}) + + client = FHIRPackageRegistryClient() + with patch.object( + client, + "load_resources_from_package", + side_effect=[[dep_a], [dep_b]], + ): + results, errors = client._process_package_tar("StructureDefinition", tar) + + assert errors == [] + assert dep_a in results + assert dep_b in results + + def test_already_loaded_dependency_is_skipped(self): + """A dependency already in history is not re-downloaded.""" + package_json = json.dumps( + {"name": "my.pkg", "version": "1.0.0", "dependencies": {"dep.pkg": "1.0.0"}} + ) + tar = _make_tar({"package/package.json": package_json}) + + client = FHIRPackageRegistryClient() + client.history.add("dep.pkg@1.0.0") # mark as already loaded + + with patch.object(client, "load_resources_from_package") as mock_load: + results, errors = client._process_package_tar("StructureDefinition", tar) + + mock_load.assert_not_called() + assert errors == [] + + def test_dependency_failure_recorded_as_error_not_raised(self): + """A failing dependency download is recorded in errors, not raised.""" + package_json = json.dumps( + {"name": "my.pkg", "version": "1.0.0", "dependencies": {"bad.dep": "9.9.9"}} + ) + tar = _make_tar({"package/package.json": package_json}) + + client = FHIRPackageRegistryClient() + with patch.object( + client, + "load_resources_from_package", + side_effect=PackageNotFoundError("not found"), + ): + results, errors = client._process_package_tar("StructureDefinition", tar) + + assert len(errors) == 1 + assert "bad.dep" in errors[0] + assert results == [] + + def test_install_dependencies_false_skips_dependency_processing(self): + """When install_dependencies=False the dependency block is never entered.""" + package_json = json.dumps( + {"name": "my.pkg", "version": "1.0.0", "dependencies": {"dep.pkg": "1.0.0"}} + ) + tar = _make_tar({"package/package.json": package_json}) + + client = FHIRPackageRegistryClient() + with patch.object(client, "load_resources_from_package") as mock_load: + results, errors = client._process_package_tar( + "StructureDefinition", tar, install_dependencies=False + ) + + mock_load.assert_not_called() + + +# =========================================================================== +# TestLoadResourcesFromPackage – dependency propagation (regression) +# =========================================================================== + + +class TestLoadResourcesFromPackageDependencies: + """Regression tests for the bug where dependency SDs were not returned.""" + + def _make_download_mock(self, pkg_sd_url: str, dep_sd_url: str): + """ + Return a side_effect callable that serves two packages: + - "my.pkg/1.0.0" → tar with package.json declaring dep.pkg + one SD + - "dep.pkg/1.0.0" → tar with one SD + """ + pkg_sd = _sd(pkg_sd_url) + dep_sd = _sd(dep_sd_url) + pkg_package_json = json.dumps( + {"name": "my.pkg", "version": "1.0.0", "dependencies": {"dep.pkg": "1.0.0"}} + ) + dep_package_json = json.dumps( + {"name": "dep.pkg", "version": "1.0.0", "dependencies": {}} + ) + + pkg_tar = _make_tar( + { + "package/package.json": pkg_package_json, + "package/StructureDefinition-PkgProfile.json": pkg_sd, + } + ) + dep_tar = _make_tar( + { + "package/package.json": dep_package_json, + "package/StructureDefinition-DepProfile.json": dep_sd, + } + ) + + tars = {"my.pkg": pkg_tar, "dep.pkg": dep_tar} + + def _download(name, version, extract=False): + tf = tars[name] + assert tf.fileobj is not None, "TarFile must have fileobj for mock to work" + # Re-open the same buffer for each download call + tf.fileobj.seek(0) + return tarfile.open(fileobj=tf.fileobj, mode="r:gz") + + return _download + + @patch("requests.Session.get") + def test_register_package_also_returns_dependency_sds(self, mock_get): + """ + Regression test: load_resources_from_package must include SDs from + transitive dependencies in its return value, not only the top-level package. + """ + pkg_sd_url = "http://example.org/StructureDefinition/PkgProfile" + dep_sd_url = "http://example.org/StructureDefinition/DepProfile" + + # Mock the HTTP layer so download_package calls get real tar content + def _http_side_effect(url, **kwargs): + resp = Mock() + resp.status_code = 200 + if "my.pkg" in url: + pkg_package_json = json.dumps( + { + "name": "my.pkg", + "version": "1.0.0", + "dependencies": {"dep.pkg": "1.0.0"}, + } + ) + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w:gz") as tf: + for name, content in { + "package/package.json": pkg_package_json, + "package/StructureDefinition-PkgProfile.json": _sd(pkg_sd_url), + }.items(): + b = content.encode("utf-8") + info = tarfile.TarInfo(name=name) + info.size = len(b) + tf.addfile(info, io.BytesIO(b)) + resp.content = buf.getvalue() + else: + dep_package_json = json.dumps( + {"name": "dep.pkg", "version": "1.0.0", "dependencies": {}} + ) + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w:gz") as tf: + for name, content in { + "package/package.json": dep_package_json, + "package/StructureDefinition-DepProfile.json": _sd(dep_sd_url), + }.items(): + b = content.encode("utf-8") + info = tarfile.TarInfo(name=name) + info.size = len(b) + tf.addfile(info, io.BytesIO(b)) + resp.content = buf.getvalue() + return resp + + mock_get.side_effect = _http_side_effect + + client = FHIRPackageRegistryClient() + results = client.load_resources_from_package( + "StructureDefinition", "my.pkg", "1.0.0" + ) + urls = [r["url"] for r in results] + + assert pkg_sd_url in urls, "Top-level package SD must be in results" + assert ( + dep_sd_url in urls + ), "Dependency SD must also be in results (regression: was silently dropped)" diff --git a/test/test_fhir_resources_definitions_registry.py b/test/test_fhir_resources_definitions_registry.py index 3000a9b0..ae539c0f 100644 --- a/test/test_fhir_resources_definitions_registry.py +++ b/test/test_fhir_resources_definitions_registry.py @@ -365,10 +365,49 @@ def test_download_package__calls_load_resources_from_package(): reg.download_package("hl7.fhir.r4b.core", "4.3.0") reg._package_client.load_resources_from_package.assert_called_once_with( - "StructureDefinition", "hl7.fhir.r4b.core", "4.3.0" + "StructureDefinition", "hl7.fhir.r4b.core", "4.3.0", install_dependencies=True ) +def test_download_package__include_dependencies_true_passes_flag(): + reg = make_registry() + sd = make_sd() + reg._package_client = MagicMock() + reg._package_client.load_resources_from_package.return_value = [{"url": SD_URL}] + + with patch.object(reg, "_validate_structure_definition", return_value=sd): + reg.download_package("pkg", "1.0", include_dependencies=True) + + _, kwargs = reg._package_client.load_resources_from_package.call_args + assert kwargs["install_dependencies"] is True + + +def test_download_package__include_dependencies_false_passes_flag(): + reg = make_registry() + sd = make_sd() + reg._package_client = MagicMock() + reg._package_client.load_resources_from_package.return_value = [{"url": SD_URL}] + + with patch.object(reg, "_validate_structure_definition", return_value=sd): + reg.download_package("pkg", "1.0", include_dependencies=False) + + _, kwargs = reg._package_client.load_resources_from_package.call_args + assert kwargs["install_dependencies"] is False + + +def test_download_package__include_dependencies_defaults_to_true(): + reg = make_registry() + sd = make_sd() + reg._package_client = MagicMock() + reg._package_client.load_resources_from_package.return_value = [{"url": SD_URL}] + + with patch.object(reg, "_validate_structure_definition", return_value=sd): + reg.download_package("pkg", "1.0") + + _, kwargs = reg._package_client.load_resources_from_package.call_args + assert kwargs["install_dependencies"] is True + + def test_download_package__adds_each_validated_sd(): reg = make_registry() sd_a = make_sd(url="http://example.org/A") @@ -386,6 +425,78 @@ def test_download_package__adds_each_validated_sd(): assert "http://example.org/B" in reg.structure_definitions_by_url +def test_download_package__raises_on_invalid_sd_by_default(): + reg = make_registry() + reg._package_client = MagicMock() + reg._package_client.load_resources_from_package.return_value = [ + {"url": "http://example.org/Bad"} + ] + + with patch.object( + reg, + "_validate_structure_definition", + side_effect=ValueError("invalid SD"), + ): + with pytest.raises(ValueError, match="invalid SD"): + reg.download_package("pkg", "1.0") + + +def test_download_package__skip_invalid_true_does_not_raise(): + reg = make_registry() + reg._package_client = MagicMock() + reg._package_client.load_resources_from_package.return_value = [ + {"url": "http://example.org/Bad"} + ] + + with patch.object( + reg, + "_validate_structure_definition", + side_effect=ValueError("invalid SD"), + ): + # Should not raise + reg.download_package("pkg", "1.0", skip_invalid=True) + + +def test_download_package__skip_invalid_true_emits_warning(): + reg = make_registry() + reg._package_client = MagicMock() + reg._package_client.load_resources_from_package.return_value = [ + {"url": "http://example.org/Bad"} + ] + + with patch.object( + reg, + "_validate_structure_definition", + side_effect=ValueError("invalid SD"), + ): + with pytest.warns(UserWarning, match="http://example.org/Bad"): + reg.download_package("pkg", "1.0", skip_invalid=True) + + +def test_download_package__skip_invalid_skips_bad_registers_good(): + reg = make_registry() + sd_good = make_sd(url="http://example.org/Good") + reg._package_client = MagicMock() + reg._package_client.load_resources_from_package.return_value = [ + {"url": "http://example.org/Bad"}, + {"url": "http://example.org/Good"}, + ] + + with patch.object( + reg, + "_validate_structure_definition", + side_effect=[ValueError("invalid SD"), sd_good], + ): + import warnings as _warnings + + with _warnings.catch_warnings(): + _warnings.simplefilter("ignore") + reg.download_package("pkg", "1.0", skip_invalid=True) + + assert "http://example.org/Good" in reg.structure_definitions_by_url + assert "http://example.org/Bad" not in reg.structure_definitions_by_url + + # =========================================================================== # StructureDefinitionRegistry.from_dict # =========================================================================== diff --git a/test/test_fhir_resources_factory_core.py b/test/test_fhir_resources_factory_core.py index 2f21ba35..9f80f163 100644 --- a/test/test_fhir_resources_factory_core.py +++ b/test/test_fhir_resources_factory_core.py @@ -445,7 +445,9 @@ def test_register_package__delegates_to_registry(): registry = make_registry() factory = FHIRModelFactory(registry=registry, fhir_release="R4") factory.register_package("hl7.fhir.us.core", "5.0.1") - registry.download_package.assert_called_once_with("hl7.fhir.us.core", "5.0.1") + registry.download_package.assert_called_once_with( + "hl7.fhir.us.core", "5.0.1", False, True + ) # ===========================================================================