Skip to content

Commit

Permalink
do not greedily load the contents of auxiliary files
Browse files Browse the repository at this point in the history
as [at]kayoub5 rightfully noted, some types of ODX files (usually
`.odx-f`) may feature auxiliary files that might be several Gigabytes
in size. What we do instead is to store file handles to these
files. (`ZipFile` thankfully provides that without having to
uncompress the whole archive.) This approach comes with the
disadvantage that these files must not be modified on disk while they
are used by odxtools, but IMO this is trade-off is worthwhile.

thanks to [at]kayoub5 for his insistence.

Signed-off-by: Andreas Lauser <[email protected]>
Signed-off-by: Gerrit Ecke <[email protected]>
  • Loading branch information
andlaus committed Jun 11, 2024
1 parent 63958a8 commit 776d740
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 19 deletions.
6 changes: 4 additions & 2 deletions examples/somersaultecu.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# SPDX-License-Identifier: MIT
import pathlib
from enum import IntEnum
from io import BytesIO
from itertools import chain
from typing import Any, Dict
from xml.etree import ElementTree
Expand Down Expand Up @@ -2467,10 +2468,11 @@ class SomersaultSID(IntEnum):
database = Database()
database._diag_layer_containers = NamedItemList([somersault_dlc])
database._comparam_subsets = NamedItemList(comparam_subsets)
database.add_auxiliary_file("jobs.py", b"""
database.add_auxiliary_file("jobs.py",
BytesIO(b"""
def compulsory_program():
print("Hello, World")
""")
"""))

# Create ID mapping and resolve references
database.refresh()
16 changes: 9 additions & 7 deletions odxtools/database.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: MIT
from itertools import chain
from pathlib import Path
from typing import List, Optional, OrderedDict
from typing import IO, List, Optional, OrderedDict
from xml.etree import ElementTree
from zipfile import ZipFile

Expand All @@ -27,7 +27,7 @@ def __init__(self,
pdx_zip: Optional[ZipFile] = None,
odx_d_file_name: Optional[str] = None) -> None:
self.model_version: Optional[Version] = None
self.auxiliary_files: OrderedDict[str, bytes] = OrderedDict()
self.auxiliary_files: OrderedDict[str, IO[bytes]] = OrderedDict()

# create an empty database object
self._diag_layer_containers = NamedItemList[DiagLayerContainer]()
Expand All @@ -47,16 +47,18 @@ def add_pdx_file(self, pdx_file_name: str) -> None:
root = ElementTree.parse(pdx_zip.open(zip_member)).getroot()
self._process_xml_tree(root)
elif p.name.lower() != "index.xml":
self.add_auxiliary_file(zip_member, pdx_zip.read(zip_member))
self.add_auxiliary_file(zip_member, pdx_zip.open(zip_member))

def add_odx_file(self, odx_file_name: str) -> None:
self._process_xml_tree(ElementTree.parse(odx_file_name).getroot())

def add_auxiliary_file(self, aux_file_name: str, aux_file_data: Optional[bytes] = None) -> None:
if aux_file_data is None:
aux_file_data = open(aux_file_name, "rb").read()
def add_auxiliary_file(self,
aux_file_name: str,
aux_file_obj: Optional[IO[bytes]] = None) -> None:
if aux_file_obj is None:
aux_file_obj = open(aux_file_name, "rb")

self.auxiliary_files[aux_file_name] = aux_file_data
self.auxiliary_files[aux_file_name] = aux_file_obj

def _process_xml_tree(self, root: ElementTree.Element) -> None:
dlcs: List[DiagLayerContainer] = []
Expand Down
2 changes: 1 addition & 1 deletion odxtools/loadfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def load_directory(dir_name: Union[str, Path]) -> Database:
elif p.suffix.lower().startswith(".odx"):
db.add_odx_file(str(p))
elif p.name.lower() != "index.xml":
db.add_auxiliary_file(p.name, open(str(p), "rb").read())
db.add_auxiliary_file(p.name, open(str(p), "rb"))

db.refresh()
return db
18 changes: 12 additions & 6 deletions odxtools/progcode.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# SPDX-License-Identifier: MIT
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Dict, List, Optional
from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast
from xml.etree import ElementTree

from .exceptions import odxrequire
from .exceptions import odxraise, odxrequire
from .odxlink import OdxDocFragment, OdxLinkDatabase, OdxLinkId, OdxLinkRef

if TYPE_CHECKING:
Expand Down Expand Up @@ -58,7 +58,13 @@ def _resolve_odxlinks(self, odxlinks: OdxLinkDatabase) -> None:
def _resolve_snrefs(self, diag_layer: "DiagLayer") -> None:
db = diag_layer._database

self._code = odxrequire(
db.auxiliary_files.get(self.code_file),
f"Reference to auxiliary file '{self.code_file}' "
f"could not be resolved")
aux_file = db.auxiliary_files.get(self.code_file)

if aux_file is None:
odxraise(f"Reference to auxiliary file '{self.code_file}' "
f"could not be resolved")
self._code: bytes = cast(bytes, None)
return

self._code = aux_file.read()
aux_file.seek(0)
4 changes: 2 additions & 2 deletions odxtools/writepdxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def write_pdx_file(
out_file.write(open(in_file_name, "rb").read())

# write the auxiliary files
for output_file_name, data in database.auxiliary_files.items():
for output_file_name, data_file in database.auxiliary_files.items():
file_cdate = datetime.datetime.fromtimestamp(time.time())
creation_date = file_cdate.strftime("%Y-%m-%dT%H:%M:%S")

Expand All @@ -137,7 +137,7 @@ def write_pdx_file(
zf_name = os.path.basename(output_file_name)
with zf.open(zf_name, "w") as out_file:
file_index.append((zf_name, creation_date, mime_type))
out_file.write(data)
out_file.write(data_file.read())

jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))
jinja_env.globals["hasattr"] = hasattr
Expand Down
3 changes: 2 additions & 1 deletion tests/test_singleecujob.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import inspect
import os
import unittest
from io import BytesIO
from typing import NamedTuple, cast
from xml.etree import ElementTree

Expand Down Expand Up @@ -468,7 +469,7 @@ def test_resolve_odxlinks(self) -> None:

db = Database()
db.add_auxiliary_file("abc.jar",
b"this is supposed to be a JAR archive, but it isn't (HARR)")
BytesIO(b"this is supposed to be a JAR archive, but it isn't (HARR)"))

dl._resolve_odxlinks(odxlinks)
dl._finalize_init(db, odxlinks)
Expand Down

0 comments on commit 776d740

Please sign in to comment.