Skip to content

Commit

Permalink
Ensure FhirPackages Are Pickle-Able
Browse files Browse the repository at this point in the history
- _json_parser.JsonParser objects are not pickle-able.
- Rather than re-use the same JsonParser object to parse all resources
  from all ResourceCollections, use a new one to parse each resource.
- By avoiding JsonParsers as instance variables, ResourceCollections
  and thus FHirPackages become pickle-able. This is helpful for use
  cases such as multi-processing.

PiperOrigin-RevId: 466098436
  • Loading branch information
waltaskew authored and nickgeorge committed Aug 10, 2022
1 parent 4b1db5a commit 0c30acd
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 30 deletions.
30 changes: 18 additions & 12 deletions py/google/fhir/core/utils/fhir_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,20 +78,22 @@ class ResourceCollection(Iterable[_T]):
Attributes:
archive_file: The zip or tar file path or a function returning a file-like
proto_cls: The class of the proto this collection contains.
json_parser: The parser to convert JSON data into FHIR.
handler: The FHIR primitive handler used for resource parsing.
resource_time_zone: The time zone code to parse resource dates into.
parsed_resources: A cache of resources which have been parsed into protocol
buffers.
resource_paths_for_uris: A mapping of URIs to tuples of the path within the
zip file containing the resource JSON and the type of that resource.
"""

def __init__(self, archive_file: PackageSource, proto_cls: Type[_T],
json_parser: _json_parser.JsonParser) -> None:

handler: primitive_handler.PrimitiveHandler,
resource_time_zone: str) -> None:
self.archive_file = archive_file

self.proto_cls = proto_cls
self.json_parser = json_parser
self.handler = handler
self.resource_time_zone = resource_time_zone

self.parsed_resources: Dict[str, _T] = {}
self.resource_paths_for_uris: Dict[str, Tuple[str, str]] = {}

Expand Down Expand Up @@ -156,6 +158,7 @@ def _parse_proto_from_file(self, uri: str, archive_file: BinaryIO,
Returns:
The protocol buffer for the resource or `None` if it can not be found.
"""
json_parser = _json_parser.JsonParser(self.handler, self.resource_time_zone)
# Default to zip files for compatiblity.
if (not isinstance(archive_file.name, str) or
archive_file.name.endswith('.zip')):
Expand Down Expand Up @@ -183,11 +186,11 @@ def _parse_proto_from_file(self, uri: str, archive_file: BinaryIO,
return None
else:
target = self.proto_cls()
self.json_parser.merge_value(json_value, target)
json_parser.merge_value(json_value, target)
return target
else:
target = self.proto_cls()
self.json_parser.merge_value(json_obj, target)
json_parser.merge_value(json_obj, target)
return target

def __iter__(self) -> Iterator[_T]:
Expand Down Expand Up @@ -238,16 +241,19 @@ def load(cls,
Raises:
ValueError: In the event that the file or contents are invalid.
"""
parser = _json_parser.JsonParser(handler, resource_time_zone)
collections_per_resource_type = {
'StructureDefinition':
ResourceCollection(archive_file, struct_def_class, parser),
ResourceCollection(archive_file, struct_def_class, handler,
resource_time_zone),
'SearchParameter':
ResourceCollection(archive_file, search_param_class, parser),
ResourceCollection(archive_file, search_param_class, handler,
resource_time_zone),
'CodeSystem':
ResourceCollection(archive_file, code_system_class, parser),
ResourceCollection(archive_file, code_system_class, handler,
resource_time_zone),
'ValueSet':
ResourceCollection(archive_file, value_set_class, parser),
ResourceCollection(archive_file, value_set_class, handler,
resource_time_zone),
}

with _open_path_or_factory(archive_file) as fd:
Expand Down
38 changes: 20 additions & 18 deletions py/google/fhir/core/utils/fhir_package_test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import contextlib
import io
import json
import pickle
import tarfile
import tempfile
from typing import Any, Callable, Iterable, Sequence, Tuple, cast
Expand Down Expand Up @@ -77,23 +78,20 @@ class FhirPackageTest(
def _primitive_handler(self):
raise NotImplementedError('Subclasses must implement _primitive_handler')

@property
@abc.abstractmethod
def _parser(self):
raise NotImplementedError('Subclasses must implement _parser')

@property
@abc.abstractmethod
def _valueset_cls(self):
raise NotImplementedError('Subclasses must implement _valueset_cls')

@abc.abstractmethod
def _load_package(
self, source: fhir_package.PackageSource) -> fhir_package.FhirPackage:
self,
package_source: fhir_package.PackageSource) -> fhir_package.FhirPackage:
raise NotImplementedError('Subclasses must implement _load_package')

def empty_collection(self) -> fhir_package.ResourceCollection:
return fhir_package.ResourceCollection('', self._valueset_cls, self._parser)
return fhir_package.ResourceCollection('', self._valueset_cls,
self._primitive_handler, 'Z')

@_parameterized_with_package_sources
def testFhirPackageLoad_withValidFhirPackage_isReadable(
Expand Down Expand Up @@ -263,6 +261,15 @@ def testFhirPackageGetResource_forMissingUri_isNone(self):
value_sets=self.empty_collection())
self.assertIsNone(package.get_resource('some_uri'))

def testFhirPackage_pickle_isSuccessful(self):
"""Ensure FhirPackages are pickle-able."""
package = fhir_package.FhirPackage(
structure_definitions=self.empty_collection(),
search_parameters=self.empty_collection(),
code_systems=self.empty_collection(),
value_sets=self.empty_collection())
pickle.dumps(package)


class ResourceCollectionTest(absltest.TestCase, abc.ABC):
"""Base class for testing ResourceCollections."""
Expand All @@ -272,11 +279,6 @@ class ResourceCollectionTest(absltest.TestCase, abc.ABC):
def _primitive_handler(self):
raise NotImplementedError('Subclasses must implement _primitive_handler')

@property
@abc.abstractmethod
def _parser(self):
raise NotImplementedError('Subclasses must implement _parser')

@property
@abc.abstractmethod
def _valueset_cls(self):
Expand All @@ -295,7 +297,7 @@ def testResourceCollection_addGetResource(self):
with zipfile_containing(zipfile_contents) as temp_file:
collection = fhir_package.ResourceCollection(temp_file.name,
self._valueset_cls,
self._parser)
self._primitive_handler, 'Z')
collection.add_uri_at_path(uri, 'a_value_set.json', 'ValueSet')
resource = collection.get_resource(uri)

Expand Down Expand Up @@ -333,15 +335,15 @@ def check_resources(collection):
with zipfile_containing(file_contents) as temp_file:
collection = fhir_package.ResourceCollection(temp_file.name,
self._valueset_cls,
self._parser)
self._primitive_handler, 'Z')
collection.add_uri_at_path('http://value-in-a-bundle', 'a_bundle.json',
'Bundle')
check_resources(collection)

with npmfile_containing(file_contents) as temp_file:
collection = fhir_package.ResourceCollection(temp_file.name,
self._valueset_cls,
self._parser)
self._primitive_handler, 'Z')
collection.add_uri_at_path('http://value-in-a-bundle',
'package/a_bundle.json', 'Bundle')
check_resources(collection)
Expand All @@ -350,7 +352,7 @@ def testResourceCollection_getMissingResource(self):
"""Ensure we return None when requesing missing resources."""
collection = fhir_package.ResourceCollection('missing_file.zip',
self._valueset_cls,
self._parser)
self._primitive_handler, 'Z')
resource = collection.get_resource('missing-uri')

self.assertIsNone(resource)
Expand All @@ -371,7 +373,7 @@ def testResourceCollection_getResourceWithUnexpectedUrl(
with zipfile_containing(zipfile_contents) as temp_file:
collection = fhir_package.ResourceCollection(temp_file.name,
self._valueset_cls,
self._parser)
self._primitive_handler, 'Z')
collection.add_uri_at_path(uri, 'a_value_set.json', 'ValueSet')

# The resource unexpectedly has the wrong url
Expand All @@ -397,7 +399,7 @@ def testResourceCollection_cachedResource(self):
with zipfile_containing(zipfile_contents) as temp_file:
collection = fhir_package.ResourceCollection(temp_file.name,
self._valueset_cls,
self._parser)
self._primitive_handler, 'Z')
collection.add_uri_at_path(uri, 'a_value_set.json', 'ValueSet')
# Get the resource for the first time to cache it
resource = collection.get_resource(uri)
Expand Down

0 comments on commit 0c30acd

Please sign in to comment.