From 674364e3f6174f2fd23922fe2d6c36d2bf3c930b Mon Sep 17 00:00:00 2001 From: Scott Dixon Date: Mon, 15 Jul 2024 15:56:32 -0700 Subject: [PATCH] Allow relative target paths Fix for issue #109. This relaxes some logic that required existing paths before creating a DSDLFile abstraction while adding a requirement that ReadableDSDLFile(s) are created with files that exist (i.e. it can't be "readable" if it doesn't exist). At the same time, we clarified (and fixed) the difference between the `root_namespace_directories_or_names` and `lookup_directories arguments` of read_files. Our new guidance is to dis-use `lookup_directories` unless there is a need to separate target lookup paths from dependent type look up paths. This was an unstated design goal that I forgot about and subsequently mis-implemented. This does change the behaviour of the API slightly which is why I've bumped the minor version instead of just the patch. --- pydsdl/__init__.py | 2 +- pydsdl/_dsdl.py | 10 ++- pydsdl/_dsdl_definition.py | 165 +++++++++++++++++++++++++----------- pydsdl/_namespace.py | 127 ++++++++++++++++++++++++--- pydsdl/_namespace_reader.py | 4 +- 5 files changed, 239 insertions(+), 69 deletions(-) diff --git a/pydsdl/__init__.py b/pydsdl/__init__.py index e8049de..8531978 100644 --- a/pydsdl/__init__.py +++ b/pydsdl/__init__.py @@ -7,7 +7,7 @@ import sys as _sys from pathlib import Path as _Path -__version__ = "1.21.1" +__version__ = "1.22.0" __version_info__ = tuple(map(int, __version__.split(".")[:3])) __license__ = "MIT" __author__ = "OpenCyphal" diff --git a/pydsdl/_dsdl.py b/pydsdl/_dsdl.py index 9cc007a..f8d7c98 100644 --- a/pydsdl/_dsdl.py +++ b/pydsdl/_dsdl.py @@ -5,7 +5,7 @@ from __future__ import annotations from abc import ABC, abstractmethod from pathlib import Path -from typing import Any, Callable, Iterable, TypeVar, List, Tuple +from typing import Any, Callable, Iterable, List, Tuple, TypeVar from ._serializable import CompositeType, Version @@ -104,6 +104,14 @@ class ReadableDSDLFile(DSDLFile): A DSDL file that can construct a composite type from its contents. """ + @property + @abstractmethod + def file_path(self) -> Path: + """ + Path to an existing DSDL file on the filesystem. + """ + raise NotImplementedError() + @abstractmethod def read( self, diff --git a/pydsdl/_dsdl_definition.py b/pydsdl/_dsdl_definition.py index d1f15b7..a85e6b4 100644 --- a/pydsdl/_dsdl_definition.py +++ b/pydsdl/_dsdl_definition.py @@ -45,6 +45,7 @@ class DSDLDefinition(ReadableDSDLFile): :param file_path: The path to the DSDL file. :param root_namespace_path: The path to the root namespace directory. `file_path` must be a descendant of this path. See `from_first_in` for a more flexible way to create a DSDLDefinition object. + :raises InvalidDefinitionError: If file_path does not exist. """ @classmethod @@ -56,20 +57,37 @@ def _infer_path_to_root_from_first_found(cls, dsdl_path: Path, valid_dsdl_roots: if valid_dsdl_roots is None: raise ValueError("valid_dsdl_roots was None") - if dsdl_path.is_absolute() and len(valid_dsdl_roots) == 0: - raise PathInferenceError( - f"dsdl_path ({dsdl_path}) is absolute and the provided valid root names are empty. The DSDL root of " - "an absolute path cannot be inferred without this information.", - dsdl_path, - valid_dsdl_roots, - ) - + # INFERENCE 1: The easiest inference is when the target path is relative to the current working directory and + # the root is a direct child folder. In this case we allow targets to be specified as simple, relative paths + # where we infer the root from the first part of each path. if len(valid_dsdl_roots) == 0: - # if we have no valid roots we can only infer the root of the path. We require the path to be relative - # to avoid accidental inferences given that dsdl file trees starting from a filesystem root are rare. - return Path(dsdl_path.parts[0]) - - # INFERENCE 1: The strongest inference is when the path is relative to a known root. + # if we have no valid roots we can only infer the root of the path. + if dsdl_path.is_absolute(): + # but if the path is absolute we refuse to infer the root as this would cause us to search the entire + # filesystem for DSDL files and it's almost certainly wrong. + raise PathInferenceError( + f"No valid roots provided for absolute path {str(dsdl_path)}. Unable to infer root without " + "more information.", + dsdl_path, + valid_dsdl_roots, + ) + else: + # if the path is relative we'll assume the root is the top-most folder in the path. + directly_inferred = Path(dsdl_path.parts[0]) + try: + directly_inferred.resolve(strict=True) + except FileNotFoundError: + raise PathInferenceError( + f"No valid root found in path {str(dsdl_path)} and the inferred root {str(directly_inferred)} " + "does not exist. You either need to change your working directory to the folder that contains " + "this root folder or provide a valid root path.", + dsdl_path, + valid_dsdl_roots, + ) + return directly_inferred + + # INFERENCE 2: The next easiest inference is when the target path is relative to a known dsdl root. These + # operations should work with pure paths and not require filesystem access. resolved_dsdl_path = dsdl_path.resolve(strict=False) if dsdl_path.is_absolute() else None for path_to_root in valid_dsdl_roots: # First we try the paths as-is... @@ -89,7 +107,27 @@ def _infer_path_to_root_from_first_found(cls, dsdl_path: Path, valid_dsdl_roots: else: return path_to_root_resolved - # INFERENCE 2: A weaker, but valid inference is when the path is a child of a known root folder name. + # INFERENCE 3: If the target is relative then we can try to find a valid root by looking for the file in the + # root directories. This is a stronger inference than the previous one because it requires the file to exist + # but we do it second because it reads the filesystem. + if not dsdl_path.is_absolute(): + for path_to_root in valid_dsdl_roots: + path_to_root_parent = path_to_root + while path_to_root_parent != path_to_root_parent.parent: + # Weld together and check only if the root's last part is the same name as the target's first part. + # yes: + # path/to/root + root/then/Type.1.0.dsdl <- /root == root/ + # no: + # path/to/not_root + root/then/Type.1.0.dsdl <- /not_root != root/ + if ( + path_to_root_parent.parts[-1] == dsdl_path.parts[0] + and (path_to_root_parent.parent / dsdl_path).exists() + ): + return path_to_root_parent + path_to_root_parent = path_to_root_parent.parent + + # INFERENCE 4: A weaker, but valid inference is when the target path is a child of a known root folder name. + # This is only allowed if dsdl roots are top-level namespace names and not paths. root_parts = [x.parts[-1] for x in valid_dsdl_roots if len(x.parts) == 1] parts = list(dsdl_path.parent.parts) for i, part in list(enumerate(parts)): @@ -102,9 +140,11 @@ def _infer_path_to_root_from_first_found(cls, dsdl_path: Path, valid_dsdl_roots: def from_first_in(cls: Type["DSDLDefinition"], dsdl_path: Path, valid_dsdl_roots: list[Path]) -> "DSDLDefinition": """ Creates a DSDLDefinition object by inferring the path to the namespace root of a DSDL file given a set - of valid roots. The logic used prefers an instance of dsdl_path found to exist under a valid root but - will degrade to pure-path string matching if no file is found. Because this logic uses the first root path - that passes one of these two inferences the order of the valid_dsdl_roots list matters. + of valid roots and, if the dsdl path is relative, resolving the dsdl path relative to said roots. The logic used + prefers an instance of `dsdl_path` found to exist under a valid root but will degrade to pure-path string + matching if no file is found (If this does not yield a valid path to an existing dsdl file an exception is + raised). Because this logic uses the first root path that passes one of these two inferences the order of the + valid_dsdl_roots list matters. :param dsdl_path: The path to the alleged DSDL file. :param valid_dsdl_roots: The ordered set of valid root names or paths under which the type must reside. @@ -112,14 +152,26 @@ def from_first_in(cls: Type["DSDLDefinition"], dsdl_path: Path, valid_dsdl_roots as the caller is expected to provide a correct set of paths. :return A new DSDLDefinition object :raises PathInferenceError: If the namespace root cannot be inferred from the provided information. + :raises InvalidDefinitionError: If the file does not exist. """ - return cls(dsdl_path, cls._infer_path_to_root_from_first_found(dsdl_path, valid_dsdl_roots)) + root_path = cls._infer_path_to_root_from_first_found(dsdl_path, valid_dsdl_roots) + if not dsdl_path.is_absolute(): + dsdl_path_resolved = (root_path.parent / dsdl_path).resolve(strict=False) + else: + dsdl_path_resolved = dsdl_path.resolve(strict=False) + return cls(dsdl_path_resolved, root_path) def __init__(self, file_path: Path, root_namespace_path: Path): """ """ # Normalizing the path and reading the definition text self._file_path = Path(file_path) del file_path + + if not self._file_path.exists(): + raise InvalidDefinitionError( + "Attempt to construct ReadableDSDLFile object for file that doesn't exist.", self._file_path + ) + self._root_namespace_path = Path(root_namespace_path) del root_namespace_path self._text: str | None = None @@ -171,8 +223,12 @@ def __init__(self, file_path: Path, root_namespace_path: Path): self._cached_type: CompositeType | None = None # +-----------------------------------------------------------------------+ - # | ReadableDSDLFile :: INTERFACE | + # | ReadableDSDLFile :: INTERFACE | # +-----------------------------------------------------------------------+ + @property + def file_path(self) -> Path: + return self._file_path + def read( self, lookup_definitions: Iterable[ReadableDSDLFile], @@ -185,9 +241,6 @@ def read( _logger.debug("%s: Cache hit", log_prefix) return self._cached_type - if not self._file_path.exists(): - raise InvalidDefinitionError("Attempt to read DSDL file that doesn't exist.", self._file_path) - started_at = time.monotonic() # Remove the target definition from the lookup list in order to prevent @@ -274,10 +327,6 @@ def fixed_port_id(self) -> int | None: def has_fixed_port_id(self) -> bool: return self.fixed_port_id is not None - @property - def file_path(self) -> Path: - return self._file_path - @property def root_namespace_path(self) -> Path: return self._root_namespace_path @@ -298,12 +347,15 @@ def __eq__(self, other: object) -> bool: return NotImplemented # pragma: no cover def __str__(self) -> str: - return "DSDLDefinition(full_name=%r, version=%r, fixed_port_id=%r, file_path=%s)" % ( - self.full_name, - self.version, - self.fixed_port_id, - self.file_path, - ) + try: + return "DSDLDefinition(full_name=%r, version=%r, fixed_port_id=%r, file_path=%s)" % ( + self.full_name, + self.version, + self.fixed_port_id, + self.file_path, + ) + except AttributeError: # pragma: no cover + return "DSDLDefinition(UNINITIALIZED)" __repr__ = __str__ @@ -315,13 +367,8 @@ def _unittest_dsdl_definition_read_non_existent() -> None: from pytest import raises as expect_raises target = Path("root", "ns", "Target.1.1.dsdl") - target_definition = DSDLDefinition(target, target.parent) - - def print_output(line_number: int, text: str) -> None: # pragma: no cover - _ = line_number, text - with expect_raises(InvalidDefinitionError): - target_definition.read([], [], print_output, True) + _ = DSDLDefinition(target, target.parent) def _unittest_dsdl_definition_read_text(temp_dsdl_factory) -> None: # type: ignore @@ -354,16 +401,11 @@ def _unittest_type_from_path_inference() -> None: # case the method assumes that the relative path is the correct and complete namespace of the type: # relative path - root = DSDLDefinition._infer_path_to_root_from_first_found(Path("uavcan/foo/bar/435.baz.1.0.dsdl"), []) + root = DSDLDefinition._infer_path_to_root_from_first_found( + Path("uavcan/foo/bar/435.baz.1.0.dsdl"), [Path("uavcan")] + ) assert root == Path("uavcan") - # The root namespace is not inferred in an absolute path without additional data: - - with expect_raises(PathInferenceError): - _ = DSDLDefinition._infer_path_to_root_from_first_found( - Path("/repo/uavcan/foo/bar/435.baz.1.0.dsdl").resolve(), [] - ) - with expect_raises(ValueError): _ = DSDLDefinition._infer_path_to_root_from_first_found( Path("/repo/uavcan/foo/bar/435.baz.1.0.dsdl").resolve(), None # type: ignore @@ -458,8 +500,31 @@ def _unittest_type_from_path_inference() -> None: assert root == Path("repo/uavcan") -def _unittest_from_first_in() -> None: - dsdl_def = DSDLDefinition.from_first_in( - Path("/repo/uavcan/foo/bar/435.baz.1.0.dsdl").resolve(), [Path("/repo/uavcan/foo/..").resolve()] - ) +def _unittest_type_from_path_inference_edge_case(temp_dsdl_factory) -> None: # type: ignore + """ + Edge case where we target a file where the namespace is under the root path. + """ + # pylint: disable=protected-access + + from pytest import raises as expect_raises + import os + + target_path = Path("dsdl_root/Type.1.0.dsdl") + target_file = temp_dsdl_factory.new_file(target_path, "@sealed").resolve() + expected_root_parent = target_file.parent.parent + with expect_raises(PathInferenceError): + _ = DSDLDefinition._infer_path_to_root_from_first_found(target_file, []) + + old_cwd = os.getcwd() + os.chdir(expected_root_parent) + try: + root = DSDLDefinition._infer_path_to_root_from_first_found(target_path, []) + assert root.parent.resolve() == expected_root_parent + finally: + os.chdir(old_cwd) + + +def _unittest_from_first_in(temp_dsdl_factory) -> None: # type: ignore + dsdl_file = temp_dsdl_factory.new_file(Path("repo/uavcan/foo/bar/435.baz.1.0.dsdl"), "@sealed") + dsdl_def = DSDLDefinition.from_first_in(dsdl_file.resolve(), [(dsdl_file.parent.parent / "..")]) assert dsdl_def.full_name == "uavcan.foo.bar.baz" diff --git a/pydsdl/_namespace.py b/pydsdl/_namespace.py index 725f8d5..02d8306 100644 --- a/pydsdl/_namespace.py +++ b/pydsdl/_namespace.py @@ -6,6 +6,7 @@ from __future__ import annotations import collections +import itertools import logging from itertools import product, repeat from pathlib import Path @@ -147,10 +148,10 @@ def read_files( It reads all DSDL definitions from the specified ``dsdl_files`` and produces the annotated AST for these types and the transitive closure of the types they depend on. - :param dsdl_files: A list of paths to dsdl files to parse. + :param Any dsdl_files: A list of paths to dsdl files to parse. - :param root_namespace_directories_or_names: This can be a set of names of root namespaces or relative paths to - root namespaces. All ``dsdl_files`` provided must be under one of these roots. For example, given: + :param root_namespace_directories_or_names: This can be a set of names of root namespaces or paths to root + namespaces. All ``dsdl_files`` provided must be under one of these roots. For example, given: .. code-block:: python @@ -166,18 +167,59 @@ def read_files( .. code-block:: python root_namespace_directories_or_names = ["animals", "plants"] + # or + root_namespace_directories_or_names = [ + Path("workspace/project/types/animals"), + Path("workspace/project/types/plants") + ] + # or + root_namespace_directories_or_names = [ + Path("animals"), + Path("workspace/project/types/plants") + ] + # etc + + + Any target path not found on the filesystem must be located under a root namespace directory. For example, + given a set of relative target files like this: + + .. code-block:: python + + dsdl_files = [ + Path("animals/felines/Tabby.1.0.dsdl"), + Path("animals/canines/Boxer.1.0.dsdl"), + Path("plants/trees/DouglasFir.1.0.dsdl") + ] + + then this argument must include paths under which the target files can be found. For example: + + .. code-block:: python root_namespace_directories_or_names = [ Path("workspace/project/types/animals"), Path("workspace/project/types/plants") ] + Note that relative paths shown here would only resolve correctly if the current working directory is set to + the root of the workspace (this function does not search system paths for the target files). If the target files + are located in a different folder the paths must be absolute. For example: - :param lookup_directories: List of other namespace directories containing data type definitions that are - referred to from the target dsdl files. For example, if you are reading vendor-specific types, - the list of lookup directories should always include a path to the standard root namespace ``uavcan``, - otherwise the types defined in the vendor-specific namespace won't be able to use data types from the - standard namespace. + .. code-block:: python + + root_namespace_directories_or_names = [ + Path("/path/to/workspace/project/types/animals"), + Path("/path/to/workspace/project/types/plants") + ] + + When reading vendor-specific types, the list of lookup directories should always include a path to the standard + root namespace ``uavcan``, otherwise the types defined in the vendor-specific namespace won't be able to use + data types from the standard namespace. + + :param lookup_directories: List of namespace directories containing data type definitions that are referred to from + the target dsdl files. Callers should prefer combining the lookup directories with the root namespace + directories unless certain paths should be excluded when looking for target types. That is, only + ``root_namespace_directories_or_names`` is used to find the target types but both this list and + ``root_namespace_directories_or_names`` are used to find the dependent types. :param print_output_handler: If provided, this callable will be invoked when a ``@print`` directive is encountered or when the frontend needs to emit a diagnostic; @@ -201,9 +243,10 @@ def read_files( :class:`ValueError`/:class:`TypeError` if the arguments are invalid. """ # Normalize paths and remove duplicates. Resolve symlinks to avoid ambiguities. + normal_root_namespace_directories_or_names = normalize_paths_argument_to_list(root_namespace_directories_or_names) target_dsdl_definitions = _construct_dsdl_definitions_from_files( normalize_paths_argument_to_list(dsdl_files), - normalize_paths_argument_to_list(root_namespace_directories_or_names), + normal_root_namespace_directories_or_names, ) if len(target_dsdl_definitions) == 0: _logger.info("No DSDL files found in the specified directories") @@ -217,7 +260,7 @@ def read_files( root_namespaces = {f.root_namespace_path.resolve() for f in target_dsdl_definitions} lookup_directories_path_list = _construct_lookup_directories_path_list( - root_namespaces, + itertools.chain(root_namespaces, filter(lambda x: x.exists(), normal_root_namespace_directories_or_names)), normalize_paths_argument_to_list(lookup_directories), True, ) @@ -334,15 +377,14 @@ def _construct_dsdl_definitions_from_files( """ """ output = set() # type: set[ReadableDSDLFile] for fp in dsdl_files: - resolved_fp = fp.resolve(strict=False) - if resolved_fp.suffix == DSDL_FILE_SUFFIX_LEGACY: + if fp.suffix == DSDL_FILE_SUFFIX_LEGACY: _logger.warning( "File uses deprecated extension %r, please rename to use %r: %s", DSDL_FILE_SUFFIX_LEGACY, DSDL_FILE_SUFFIX, - resolved_fp, + fp, ) - output.add(_dsdl_definition.DSDLDefinition.from_first_in(resolved_fp, list(valid_roots))) + output.add(_dsdl_definition.DSDLDefinition.from_first_in(fp, list(valid_roots))) return dsdl_file_sort(output) @@ -841,3 +883,60 @@ def _unittest_issue_104(temp_dsdl_factory) -> None: # type: ignore with raises(DataTypeNameCollisionError): read_files(thing_file2, file_at_root.parent, file_at_root.parent) + + +def _unittest_issue_109(temp_dsdl_factory) -> None: # type: ignore + """Allow target paths to be relative""" + + # pylint: disable=too-many-locals + + from pytest import raises + from ._dsdl_definition import PathInferenceError + + odin_1_0 = Path("gods/norse/Odin.1.0.dsdl") + huginn_1_0 = Path("familiars/norse/Huginn.1.0.dsdl") + muninn_1_0 = Path("familiars/norse/Muninn.1.0.dsdl") + + odin_file = temp_dsdl_factory.new_file( + odin_1_0, "@sealed\nfamiliars.norse.Huginn.1.0 huginn\nfamiliars.norse.Muninn.1.0 muninn\n" + ) + huginn_file = temp_dsdl_factory.new_file(huginn_1_0, "@sealed\n") + _ = temp_dsdl_factory.new_file(muninn_1_0, "@sealed\n") + + # relative path for target / absolute path for root namespace + direct1, transitive1 = read_files([odin_1_0], [odin_file.parent.parent, huginn_file.parent.parent]) + + assert len(direct1) == 1 + assert len(transitive1) == 2 + + assert direct1[0].source_file_path == odin_file + + # here we use an absolute path to the target and provide a root namespace name for it. + direct2, transitive2 = read_files([odin_file], ["gods"], [huginn_file.parent.parent]) + + assert len(direct2) == 1 + assert len(transitive2) == 2 + + assert direct2[0].source_file_path == odin_file + + # Once more to show that it works if the name and lookup is in the second argument + direct3, transitive3 = read_files([odin_file], ["gods", huginn_file.parent.parent]) + + assert len(direct3) == 1 + assert len(transitive3) == 2 + + assert direct3[0].source_file_path == odin_file + + # Next show that the lookup paths cannot be used to find targets + with raises(PathInferenceError): + _ = read_files([odin_1_0], [], [odin_file.parent.parent, huginn_file.parent.parent]) + + # Finally, we'll make sure that we don't confuse roots when two targets are rooted in the same place + # Once more to show that it works if the name and lookup is in the second argument + direct4, transitive4 = read_files([odin_1_0], [huginn_file.parent.parent, odin_file.parent.parent]) + + assert len(direct4) == 1 + assert len(transitive4) == 2 + + assert direct4[0].source_file_path == odin_file + assert direct4[0].root_namespace == "gods" diff --git a/pydsdl/_namespace_reader.py b/pydsdl/_namespace_reader.py index adb5130..fd264e1 100644 --- a/pydsdl/_namespace_reader.py +++ b/pydsdl/_namespace_reader.py @@ -201,9 +201,7 @@ def _unittest_namespace_reader_read_definitions_multiple_no_load(temp_dsdl_facto temp_dsdl_factory.new_file(Path("root", "ns", "Tacoma.1.0.dsdl"), "@sealed"), temp_dsdl_factory.new_file(Path("root", "ns", "Rainer.1.0.dsdl"), "@sealed"), temp_dsdl_factory.new_file(Path("root", "ns", "Baker.1.0.dsdl"), "@sealed"), - Path( - "root", "ns", "Shasta.1.0.dsdl" - ), # since this isn't in the transitive closure of target dependencies it will + temp_dsdl_factory.new_file(Path("root", "ns", "Shasta.1.0.dsdl"), "@sealed"), # never be read thus it will not be an error that it does not exist. ]