From 0c30acd608125a3223e3241fbd80c672d3192876 Mon Sep 17 00:00:00 2001 From: Walt Askew Date: Mon, 8 Aug 2022 18:26:36 +0000 Subject: [PATCH] Ensure FhirPackages Are Pickle-Able - _json_parser.JsonParser objects are not pickle-able. - Rather than re-use the same JsonParser object to parse all resources from all ResourceCollections, use a new one to parse each resource. - By avoiding JsonParsers as instance variables, ResourceCollections and thus FHirPackages become pickle-able. This is helpful for use cases such as multi-processing. PiperOrigin-RevId: 466098436 --- py/google/fhir/core/utils/fhir_package.py | 30 +++++++++------ .../fhir/core/utils/fhir_package_test_base.py | 38 ++++++++++--------- 2 files changed, 38 insertions(+), 30 deletions(-) diff --git a/py/google/fhir/core/utils/fhir_package.py b/py/google/fhir/core/utils/fhir_package.py index 0e727f31e..5992bb8b9 100644 --- a/py/google/fhir/core/utils/fhir_package.py +++ b/py/google/fhir/core/utils/fhir_package.py @@ -78,7 +78,8 @@ class ResourceCollection(Iterable[_T]): Attributes: archive_file: The zip or tar file path or a function returning a file-like proto_cls: The class of the proto this collection contains. - json_parser: The parser to convert JSON data into FHIR. + handler: The FHIR primitive handler used for resource parsing. + resource_time_zone: The time zone code to parse resource dates into. parsed_resources: A cache of resources which have been parsed into protocol buffers. resource_paths_for_uris: A mapping of URIs to tuples of the path within the @@ -86,12 +87,13 @@ class ResourceCollection(Iterable[_T]): """ def __init__(self, archive_file: PackageSource, proto_cls: Type[_T], - json_parser: _json_parser.JsonParser) -> None: - + handler: primitive_handler.PrimitiveHandler, + resource_time_zone: str) -> None: self.archive_file = archive_file - self.proto_cls = proto_cls - self.json_parser = json_parser + self.handler = handler + self.resource_time_zone = resource_time_zone + self.parsed_resources: Dict[str, _T] = {} self.resource_paths_for_uris: Dict[str, Tuple[str, str]] = {} @@ -156,6 +158,7 @@ def _parse_proto_from_file(self, uri: str, archive_file: BinaryIO, Returns: The protocol buffer for the resource or `None` if it can not be found. """ + json_parser = _json_parser.JsonParser(self.handler, self.resource_time_zone) # Default to zip files for compatiblity. if (not isinstance(archive_file.name, str) or archive_file.name.endswith('.zip')): @@ -183,11 +186,11 @@ def _parse_proto_from_file(self, uri: str, archive_file: BinaryIO, return None else: target = self.proto_cls() - self.json_parser.merge_value(json_value, target) + json_parser.merge_value(json_value, target) return target else: target = self.proto_cls() - self.json_parser.merge_value(json_obj, target) + json_parser.merge_value(json_obj, target) return target def __iter__(self) -> Iterator[_T]: @@ -238,16 +241,19 @@ def load(cls, Raises: ValueError: In the event that the file or contents are invalid. """ - parser = _json_parser.JsonParser(handler, resource_time_zone) collections_per_resource_type = { 'StructureDefinition': - ResourceCollection(archive_file, struct_def_class, parser), + ResourceCollection(archive_file, struct_def_class, handler, + resource_time_zone), 'SearchParameter': - ResourceCollection(archive_file, search_param_class, parser), + ResourceCollection(archive_file, search_param_class, handler, + resource_time_zone), 'CodeSystem': - ResourceCollection(archive_file, code_system_class, parser), + ResourceCollection(archive_file, code_system_class, handler, + resource_time_zone), 'ValueSet': - ResourceCollection(archive_file, value_set_class, parser), + ResourceCollection(archive_file, value_set_class, handler, + resource_time_zone), } with _open_path_or_factory(archive_file) as fd: diff --git a/py/google/fhir/core/utils/fhir_package_test_base.py b/py/google/fhir/core/utils/fhir_package_test_base.py index f345beec2..f685709c6 100644 --- a/py/google/fhir/core/utils/fhir_package_test_base.py +++ b/py/google/fhir/core/utils/fhir_package_test_base.py @@ -19,6 +19,7 @@ import contextlib import io import json +import pickle import tarfile import tempfile from typing import Any, Callable, Iterable, Sequence, Tuple, cast @@ -77,11 +78,6 @@ class FhirPackageTest( def _primitive_handler(self): raise NotImplementedError('Subclasses must implement _primitive_handler') - @property - @abc.abstractmethod - def _parser(self): - raise NotImplementedError('Subclasses must implement _parser') - @property @abc.abstractmethod def _valueset_cls(self): @@ -89,11 +85,13 @@ def _valueset_cls(self): @abc.abstractmethod def _load_package( - self, source: fhir_package.PackageSource) -> fhir_package.FhirPackage: + self, + package_source: fhir_package.PackageSource) -> fhir_package.FhirPackage: raise NotImplementedError('Subclasses must implement _load_package') def empty_collection(self) -> fhir_package.ResourceCollection: - return fhir_package.ResourceCollection('', self._valueset_cls, self._parser) + return fhir_package.ResourceCollection('', self._valueset_cls, + self._primitive_handler, 'Z') @_parameterized_with_package_sources def testFhirPackageLoad_withValidFhirPackage_isReadable( @@ -263,6 +261,15 @@ def testFhirPackageGetResource_forMissingUri_isNone(self): value_sets=self.empty_collection()) self.assertIsNone(package.get_resource('some_uri')) + def testFhirPackage_pickle_isSuccessful(self): + """Ensure FhirPackages are pickle-able.""" + package = fhir_package.FhirPackage( + structure_definitions=self.empty_collection(), + search_parameters=self.empty_collection(), + code_systems=self.empty_collection(), + value_sets=self.empty_collection()) + pickle.dumps(package) + class ResourceCollectionTest(absltest.TestCase, abc.ABC): """Base class for testing ResourceCollections.""" @@ -272,11 +279,6 @@ class ResourceCollectionTest(absltest.TestCase, abc.ABC): def _primitive_handler(self): raise NotImplementedError('Subclasses must implement _primitive_handler') - @property - @abc.abstractmethod - def _parser(self): - raise NotImplementedError('Subclasses must implement _parser') - @property @abc.abstractmethod def _valueset_cls(self): @@ -295,7 +297,7 @@ def testResourceCollection_addGetResource(self): with zipfile_containing(zipfile_contents) as temp_file: collection = fhir_package.ResourceCollection(temp_file.name, self._valueset_cls, - self._parser) + self._primitive_handler, 'Z') collection.add_uri_at_path(uri, 'a_value_set.json', 'ValueSet') resource = collection.get_resource(uri) @@ -333,7 +335,7 @@ def check_resources(collection): with zipfile_containing(file_contents) as temp_file: collection = fhir_package.ResourceCollection(temp_file.name, self._valueset_cls, - self._parser) + self._primitive_handler, 'Z') collection.add_uri_at_path('http://value-in-a-bundle', 'a_bundle.json', 'Bundle') check_resources(collection) @@ -341,7 +343,7 @@ def check_resources(collection): with npmfile_containing(file_contents) as temp_file: collection = fhir_package.ResourceCollection(temp_file.name, self._valueset_cls, - self._parser) + self._primitive_handler, 'Z') collection.add_uri_at_path('http://value-in-a-bundle', 'package/a_bundle.json', 'Bundle') check_resources(collection) @@ -350,7 +352,7 @@ def testResourceCollection_getMissingResource(self): """Ensure we return None when requesing missing resources.""" collection = fhir_package.ResourceCollection('missing_file.zip', self._valueset_cls, - self._parser) + self._primitive_handler, 'Z') resource = collection.get_resource('missing-uri') self.assertIsNone(resource) @@ -371,7 +373,7 @@ def testResourceCollection_getResourceWithUnexpectedUrl( with zipfile_containing(zipfile_contents) as temp_file: collection = fhir_package.ResourceCollection(temp_file.name, self._valueset_cls, - self._parser) + self._primitive_handler, 'Z') collection.add_uri_at_path(uri, 'a_value_set.json', 'ValueSet') # The resource unexpectedly has the wrong url @@ -397,7 +399,7 @@ def testResourceCollection_cachedResource(self): with zipfile_containing(zipfile_contents) as temp_file: collection = fhir_package.ResourceCollection(temp_file.name, self._valueset_cls, - self._parser) + self._primitive_handler, 'Z') collection.add_uri_at_path(uri, 'a_value_set.json', 'ValueSet') # Get the resource for the first time to cache it resource = collection.get_resource(uri)