From 2b5447bca3b85141aedc7676f83a19ea2f030739 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Thu, 14 Dec 2023 02:34:22 +0900 Subject: [PATCH] feat(whl_library): generate platform-specific dependency closures (#1593) Before this change, the dependency closures would be influenced by the host-python interpreter, this removes the influence by detecting the platforms against which the `Requires-Dist` wheel metadata is evaluated. This functionality can be enabled via `experimental_target_platforms` attribute to the `pip.parse` extension and is showcased in the `bzlmod` example. The same attribute is also supported on the legacy `pip_parse` repository rule. The detection works in the following way: - Check if the python wheel is platform specific or cross-platform (i.e., ends with `any.whl`), if it is then platform-specific dependencies are generated, which will go through a `select` statement. - If it is platform specific, then parse the platform_tag and evaluate the `Requires-Dist` markers assuming the target platform rather than the host platform. NOTE: The `whl` `METADATA` is now being parsed using the `packaging` Python package instead of `pkg_resources` from `setuptools`. Fixes #1591 --------- Co-authored-by: Richard Levasseur --- CHANGELOG.md | 7 + examples/bzlmod/MODULE.bazel | 14 + python/pip_install/pip_repository.bzl | 34 +- .../generate_whl_library_build_bazel.bzl | 81 +++- .../tools/wheel_installer/BUILD.bazel | 13 + .../tools/wheel_installer/arguments.py | 28 +- .../tools/wheel_installer/arguments_test.py | 14 +- .../tools/wheel_installer/wheel.py | 425 +++++++++++++++++- .../tools/wheel_installer/wheel_installer.py | 16 +- .../wheel_installer/wheel_installer_test.py | 54 +-- .../tools/wheel_installer/wheel_test.py | 235 ++++++++++ python/private/bzlmod/pip.bzl | 1 + python/private/text_util.bzl | 14 +- .../generate_build_bazel_tests.bzl | 81 +++- 14 files changed, 938 insertions(+), 79 deletions(-) create mode 100644 python/pip_install/tools/wheel_installer/wheel_test.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1937fe549d..4c761ed750 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,13 @@ A brief description of the categories of changes: * (gazelle) The gazelle plugin helper was not working with Python toolchains 3.11 and above due to a bug in the helper components not being on PYTHONPATH. +* (pip_parse) The repositories created by `whl_library` can now parse the `whl` + METADATA and generate dependency closures irrespective of the host platform + the generation is executed on. This can be turned on by supplying + `experimental_target_platforms = ["all"]` to the `pip_parse` or the `bzlmod` + equivalent. This may help in cases where fetching wheels for a different + platform using `download_only = True` feature. + [0.XX.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.XX.0 ## [0.27.0] - 2023-11-16 diff --git a/examples/bzlmod/MODULE.bazel b/examples/bzlmod/MODULE.bazel index 44d686e3dc..9ce84ee78b 100644 --- a/examples/bzlmod/MODULE.bazel +++ b/examples/bzlmod/MODULE.bazel @@ -98,6 +98,13 @@ pip.parse( "sphinxcontrib-serializinghtml", ], }, + # You can use one of the values below to specify the target platform + # to generate the dependency graph for. + experimental_target_platforms = [ + "all", + "linux_*", + "host", + ], hub_name = "pip", python_version = "3.9", requirements_lock = "//:requirements_lock_3_9.txt", @@ -121,6 +128,13 @@ pip.parse( "sphinxcontrib-serializinghtml", ], }, + # You can use one of the values below to specify the target platform + # to generate the dependency graph for. + experimental_target_platforms = [ + "all", + "linux_*", + "host", + ], hub_name = "pip", python_version = "3.10", requirements_lock = "//:requirements_lock_3_10.txt", diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl index 07e3353c77..bf37977f0c 100644 --- a/python/pip_install/pip_repository.bzl +++ b/python/pip_install/pip_repository.bzl @@ -345,6 +345,8 @@ def _pip_repository_impl(rctx): if rctx.attr.python_interpreter_target: config["python_interpreter_target"] = str(rctx.attr.python_interpreter_target) + if rctx.attr.experimental_target_platforms: + config["experimental_target_platforms"] = rctx.attr.experimental_target_platforms if rctx.attr.incompatible_generate_aliases: macro_tmpl = "@%s//{}:{}" % rctx.attr.name @@ -472,6 +474,30 @@ Warning: If a dependency participates in multiple cycles, all of those cycles must be collapsed down to one. For instance `a <-> b` and `a <-> c` cannot be listed as two separate cycles. +""", + ), + "experimental_target_platforms": attr.string_list( + default = [], + doc = """\ +A list of platforms that we will generate the conditional dependency graph for +cross platform wheels by parsing the wheel metadata. This will generate the +correct dependencies for packages like `sphinx` or `pylint`, which include +`colorama` when installed and used on Windows platforms. + +An empty list means falling back to the legacy behaviour where the host +platform is the target platform. + +WARNING: It may not work as expected in cases where the python interpreter +implementation that is being used at runtime is different between different platforms. +This has been tested for CPython only. + +Special values: `all` (for generating deps for all platforms), `host` (for +generating deps for the host platform only). `linux_*` and other `_*` values. +In the future we plan to set `all` as the default to this attribute. + +For specific target platforms use values of the form `_` where `` +is one of `linux`, `osx`, `windows` and arch is one of `x86_64`, `x86_32`, +`aarch64`, `s390x` and `ppc64le`. """, ), "extra_pip_args": attr.string_list( @@ -713,7 +739,10 @@ def _whl_library_impl(rctx): ) result = rctx.execute( - args + ["--whl-file", whl_path], + args + [ + "--whl-file", + whl_path, + ] + ["--platform={}".format(p) for p in rctx.attr.experimental_target_platforms], environment = environment, quiet = rctx.attr.quiet, timeout = rctx.attr.timeout, @@ -749,6 +778,7 @@ def _whl_library_impl(rctx): repo_prefix = rctx.attr.repo_prefix, whl_name = whl_path.basename, dependencies = metadata["deps"], + dependencies_by_platform = metadata["deps_by_platform"], group_name = rctx.attr.group_name, group_deps = rctx.attr.group_deps, data_exclude = rctx.attr.pip_data_exclude, @@ -815,7 +845,7 @@ whl_library_attrs = { doc = "Python requirement string describing the package to make available", ), "whl_patches": attr.label_keyed_string_dict( - doc = """"a label-keyed-string dict that has + doc = """a label-keyed-string dict that has json.encode(struct([whl_file], patch_strip]) as values. This is to maintain flexibility and correct bzlmod extension interface until we have a better way to define whl_library and move whl diff --git a/python/pip_install/private/generate_whl_library_build_bazel.bzl b/python/pip_install/private/generate_whl_library_build_bazel.bzl index 6d0f167f02..568b00e4df 100644 --- a/python/pip_install/private/generate_whl_library_build_bazel.bzl +++ b/python/pip_install/private/generate_whl_library_build_bazel.bzl @@ -25,6 +25,7 @@ load( "WHEEL_FILE_PUBLIC_LABEL", ) load("//python/private:normalize_name.bzl", "normalize_name") +load("//python/private:text_util.bzl", "render") _COPY_FILE_TEMPLATE = """\ copy_file( @@ -101,11 +102,36 @@ alias( ) """ +def _render_list_and_select(deps, deps_by_platform, tmpl): + deps = render.list([tmpl.format(d) for d in deps]) + + if not deps_by_platform: + return deps + + deps_by_platform = { + p if p.startswith("@") else ":is_" + p: [ + tmpl.format(d) + for d in deps + ] + for p, deps in deps_by_platform.items() + } + + # Add the default, which means that we will be just using the dependencies in + # `deps` for platforms that are not handled in a special way by the packages + deps_by_platform["//conditions:default"] = [] + deps_by_platform = render.select(deps_by_platform, value_repr = render.list) + + if deps == "[]": + return deps_by_platform + else: + return "{} + {}".format(deps, deps_by_platform) + def generate_whl_library_build_bazel( *, repo_prefix, whl_name, dependencies, + dependencies_by_platform, data_exclude, tags, entry_points, @@ -118,6 +144,7 @@ def generate_whl_library_build_bazel( repo_prefix: the repo prefix that should be used for dependency lists. whl_name: the whl_name that this is generated for. dependencies: a list of PyPI packages that are dependencies to the py_library. + dependencies_by_platform: a dict[str, list] of PyPI packages that may vary by platform. data_exclude: more patterns to exclude from the data attribute of generated py_library rules. tags: list of tags to apply to generated py_library rules. entry_points: A dict of entry points to add py_binary rules for. @@ -138,6 +165,10 @@ def generate_whl_library_build_bazel( srcs_exclude = [] data_exclude = [] + data_exclude dependencies = sorted([normalize_name(d) for d in dependencies]) + dependencies_by_platform = { + platform: sorted([normalize_name(d) for d in deps]) + for platform, deps in dependencies_by_platform.items() + } tags = sorted(tags) for entry_point, entry_point_script_name in entry_points.items(): @@ -185,22 +216,48 @@ def generate_whl_library_build_bazel( for d in group_deps } - # Filter out deps which are within the group to avoid cycles - non_group_deps = [ + dependencies = [ d for d in dependencies if d not in group_deps ] + dependencies_by_platform = { + p: deps + for p, deps in dependencies_by_platform.items() + for deps in [[d for d in deps if d not in group_deps]] + if deps + } - lib_dependencies = [ - "@%s%s//:%s" % (repo_prefix, normalize_name(d), PY_LIBRARY_PUBLIC_LABEL) - for d in non_group_deps - ] + for p in dependencies_by_platform: + if p.startswith("@"): + continue - whl_file_deps = [ - "@%s%s//:%s" % (repo_prefix, normalize_name(d), WHEEL_FILE_PUBLIC_LABEL) - for d in non_group_deps - ] + os, _, cpu = p.partition("_") + + additional_content.append( + """\ +config_setting( + name = "is_{os}_{cpu}", + constraint_values = [ + "@platforms//cpu:{cpu}", + "@platforms//os:{os}", + ], + visibility = ["//visibility:private"], +) +""".format(os = os, cpu = cpu), + ) + + lib_dependencies = _render_list_and_select( + deps = dependencies, + deps_by_platform = dependencies_by_platform, + tmpl = "@{}{{}}//:{}".format(repo_prefix, PY_LIBRARY_PUBLIC_LABEL), + ) + + whl_file_deps = _render_list_and_select( + deps = dependencies, + deps_by_platform = dependencies_by_platform, + tmpl = "@{}{{}}//:{}".format(repo_prefix, WHEEL_FILE_PUBLIC_LABEL), + ) # If this library is a member of a group, its public label aliases need to # point to the group implementation rule not the implementation rules. We @@ -223,13 +280,13 @@ def generate_whl_library_build_bazel( py_library_public_label = PY_LIBRARY_PUBLIC_LABEL, py_library_impl_label = PY_LIBRARY_IMPL_LABEL, py_library_actual_label = library_impl_label, - dependencies = repr(lib_dependencies), + dependencies = render.indent(lib_dependencies, " " * 4).lstrip(), + whl_file_deps = render.indent(whl_file_deps, " " * 4).lstrip(), data_exclude = repr(_data_exclude), whl_name = whl_name, whl_file_public_label = WHEEL_FILE_PUBLIC_LABEL, whl_file_impl_label = WHEEL_FILE_IMPL_LABEL, whl_file_actual_label = whl_impl_label, - whl_file_deps = repr(whl_file_deps), tags = repr(tags), data_label = DATA_LABEL, dist_info_label = DIST_INFO_LABEL, diff --git a/python/pip_install/tools/wheel_installer/BUILD.bazel b/python/pip_install/tools/wheel_installer/BUILD.bazel index 0eadcc25f6..a396488d3d 100644 --- a/python/pip_install/tools/wheel_installer/BUILD.bazel +++ b/python/pip_install/tools/wheel_installer/BUILD.bazel @@ -13,6 +13,7 @@ py_library( deps = [ requirement("installer"), requirement("pip"), + requirement("packaging"), requirement("setuptools"), ], ) @@ -47,6 +48,18 @@ py_test( ], ) +py_test( + name = "wheel_test", + size = "small", + srcs = [ + "wheel_test.py", + ], + data = ["//examples/wheel:minimal_with_py_package"], + deps = [ + ":lib", + ], +) + py_test( name = "wheel_installer_test", size = "small", diff --git a/python/pip_install/tools/wheel_installer/arguments.py b/python/pip_install/tools/wheel_installer/arguments.py index 25fd30f879..71133c29ca 100644 --- a/python/pip_install/tools/wheel_installer/arguments.py +++ b/python/pip_install/tools/wheel_installer/arguments.py @@ -15,7 +15,9 @@ import argparse import json import pathlib -from typing import Any +from typing import Any, Dict, Set + +from python.pip_install.tools.wheel_installer import wheel def parser(**kwargs: Any) -> argparse.ArgumentParser: @@ -39,6 +41,12 @@ def parser(**kwargs: Any) -> argparse.ArgumentParser: action="store", help="Extra arguments to pass down to pip.", ) + parser.add_argument( + "--platform", + action="extend", + type=wheel.Platform.from_string, + help="Platforms to target dependencies. Can be used multiple times.", + ) parser.add_argument( "--pip_data_exclude", action="store", @@ -68,8 +76,9 @@ def parser(**kwargs: Any) -> argparse.ArgumentParser: return parser -def deserialize_structured_args(args): +def deserialize_structured_args(args: Dict[str, str]) -> Dict: """Deserialize structured arguments passed from the starlark rules. + Args: args: dict of parsed command line arguments """ @@ -80,3 +89,18 @@ def deserialize_structured_args(args): else: args[arg_name] = [] return args + + +def get_platforms(args: argparse.Namespace) -> Set: + """Aggregate platforms into a single set. + + Args: + args: dict of parsed command line arguments + """ + platforms = set() + if args.platform is None: + return platforms + + platforms.update(args.platform) + + return platforms diff --git a/python/pip_install/tools/wheel_installer/arguments_test.py b/python/pip_install/tools/wheel_installer/arguments_test.py index 7193f4a2dc..840c2fa6cc 100644 --- a/python/pip_install/tools/wheel_installer/arguments_test.py +++ b/python/pip_install/tools/wheel_installer/arguments_test.py @@ -16,7 +16,7 @@ import json import unittest -from python.pip_install.tools.wheel_installer import arguments +from python.pip_install.tools.wheel_installer import arguments, wheel class ArgumentsTestCase(unittest.TestCase): @@ -52,6 +52,18 @@ def test_deserialize_structured_args(self) -> None: self.assertEqual(args["environment"], {"PIP_DO_SOMETHING": "True"}) self.assertEqual(args["extra_pip_args"], []) + def test_platform_aggregation(self) -> None: + parser = arguments.parser() + args = parser.parse_args( + args=[ + "--platform=host", + "--platform=linux_*", + "--platform=all", + "--requirement=foo", + ] + ) + self.assertEqual(set(wheel.Platform.all()), arguments.get_platforms(args)) + if __name__ == "__main__": unittest.main() diff --git a/python/pip_install/tools/wheel_installer/wheel.py b/python/pip_install/tools/wheel_installer/wheel.py index 84af04ca59..9c18dfde80 100644 --- a/python/pip_install/tools/wheel_installer/wheel.py +++ b/python/pip_install/tools/wheel_installer/wheel.py @@ -13,18 +13,405 @@ # limitations under the License. """Utility class to inspect an extracted wheel directory""" + import email -from typing import Dict, Optional, Set, Tuple +import platform +import re +import sys +from collections import defaultdict +from dataclasses import dataclass +from enum import Enum +from pathlib import Path +from typing import Any, Dict, List, Optional, Set, Tuple, Union import installer -import pkg_resources +from packaging.requirements import Requirement from pip._vendor.packaging.utils import canonicalize_name +class OS(Enum): + linux = 1 + osx = 2 + windows = 3 + darwin = osx + win32 = windows + + @staticmethod + def from_tag(tag: str) -> "OS": + if tag.startswith("linux"): + return OS.linux + elif tag.startswith("manylinux"): + return OS.linux + elif tag.startswith("musllinux"): + return OS.linux + elif tag.startswith("macos"): + return OS.osx + elif tag.startswith("win"): + return OS.windows + else: + raise ValueError(f"unknown tag: {tag}") + + +class Arch(Enum): + x86_64 = 1 + x86_32 = 2 + aarch64 = 3 + ppc = 4 + s390x = 5 + amd64 = x86_64 + arm64 = aarch64 + i386 = x86_32 + i686 = x86_32 + x86 = x86_32 + ppc64le = ppc + + @staticmethod + def from_tag(tag: str) -> "Arch": + for s, value in Arch.__members__.items(): + if s in tag: + return value + + if tag == "win32": + return Arch.x86_32 + else: + raise ValueError(f"unknown tag: {tag}") + + +@dataclass(frozen=True) +class Platform: + os: OS + arch: Optional[Arch] = None + + @classmethod + def all(cls, want_os: Optional[OS] = None) -> List["Platform"]: + return sorted( + [ + cls(os=os, arch=arch) + for os in OS + for arch in Arch + if not want_os or want_os == os + ] + ) + + @classmethod + def host(cls) -> List["Platform"]: + """Use the Python interpreter to detect the platform. + + We extract `os` from sys.platform and `arch` from platform.machine + + Returns: + A list of parsed values which makes the signature the same as + `Platform.all` and `Platform.from_string`. + """ + return [ + cls( + os=OS[sys.platform.lower()], + # FIXME @aignas 2023-12-13: Hermetic toolchain on Windows 3.11.6 + # is returning an empty string here, so lets default to x86_64 + arch=Arch[platform.machine().lower() or "x86_64"], + ) + ] + + def __lt__(self, other: Any) -> bool: + """Add a comparison method, so that `sorted` returns the most specialized platforms first.""" + if not isinstance(other, Platform) or other is None: + raise ValueError(f"cannot compare {other} with Platform") + + if self.arch is None and other.arch is not None: + return True + + if self.arch is not None and other.arch is None: + return True + + # Here we ensure that we sort by OS before sorting by arch + + if self.arch is None and other.arch is None: + return self.os.value < other.os.value + + if self.os.value < other.os.value: + return True + + if self.os.value == other.os.value: + return self.arch.value < other.arch.value + + return False + + def __str__(self) -> str: + if self.arch is None: + return f"@platforms//os:{self.os.name.lower()}" + + return self.os.name.lower() + "_" + self.arch.name.lower() + + @classmethod + def from_tag(cls, tag: str) -> "Platform": + return cls( + os=OS.from_tag(tag), + arch=Arch.from_tag(tag), + ) + + @classmethod + def from_string(cls, platform: Union[str, List[str]]) -> List["Platform"]: + """Parse a string and return a list of platforms""" + platform = [platform] if isinstance(platform, str) else list(platform) + ret = set() + for p in platform: + if p == "host": + ret.update(cls.host()) + elif p == "all": + ret.update(cls.all()) + elif p.endswith("*"): + os, _, _ = p.partition("_") + ret.update(cls.all(OS[os])) + else: + os, _, arch = p.partition("_") + ret.add(cls(os=OS[os], arch=Arch[arch])) + + return sorted(ret) + + # NOTE @aignas 2023-12-05: below is the minimum number of accessors that are defined in + # https://peps.python.org/pep-0496/ to make rules_python generate dependencies. + # + # WARNING: It may not work in cases where the python implementation is different between + # different platforms. + + # derived from OS + @property + def os_name(self) -> str: + if self.os == OS.linux or self.os == OS.osx: + return "posix" + elif self.os == OS.windows: + return "nt" + else: + return "" + + @property + def sys_platform(self) -> str: + if self.os == OS.linux: + return "linux" + elif self.os == OS.osx: + return "darwin" + elif self.os == OS.windows: + return "win32" + else: + return "" + + @property + def platform_system(self) -> str: + if self.os == OS.linux: + return "Linux" + elif self.os == OS.osx: + return "Darwin" + elif self.os == OS.windows: + return "Windows" + + # derived from OS and Arch + @property + def platform_machine(self) -> str: + """Guess the target 'platform_machine' marker. + + NOTE @aignas 2023-12-05: this may not work on really new systems, like + Windows if they define the platform markers in a different way. + """ + if self.arch == Arch.x86_64: + return "x86_64" + elif self.arch == Arch.x86_32 and self.os != OS.osx: + return "i386" + elif self.arch == Arch.x86_32: + return "" + elif self.arch == Arch.aarch64 and self.os == OS.linux: + return "aarch64" + elif self.arch == Arch.aarch64: + # Assuming that OSX and Windows use this one since the precedent is set here: + # https://github.com/cgohlke/win_arm64-wheels + return "arm64" + elif self.os != OS.linux: + return "" + elif self.arch == Arch.ppc64le: + return "ppc64le" + elif self.arch == Arch.s390x: + return "s390x" + else: + return "" + + def env_markers(self, extra: str) -> Dict[str, str]: + return { + "extra": extra, + "os_name": self.os_name, + "sys_platform": self.sys_platform, + "platform_machine": self.platform_machine, + "platform_system": self.platform_system, + "platform_release": "", # unset + "platform_version": "", # unset + # we assume that the following are the same as the interpreter used to setup the deps: + # "implementation_version": "X.Y.Z", + # "implementation_name": "cpython" + # "python_version": "X.Y", + # "python_full_version": "X.Y.Z", + # "platform_python_implementation: "CPython", + } + + +@dataclass(frozen=True) +class FrozenDeps: + deps: List[str] + deps_select: Dict[str, List[str]] + + +class Deps: + def __init__( + self, + name: str, + extras: Optional[Set[str]] = None, + platforms: Optional[Set[Platform]] = None, + ): + self.name: str = Deps._normalize(name) + self._deps: Set[str] = set() + self._select: Dict[Platform, Set[str]] = defaultdict(set) + self._want_extras: Set[str] = extras or {""} # empty strings means no extras + self._platforms: Set[Platform] = platforms or set() + + def _add(self, dep: str, platform: Optional[Platform]): + dep = Deps._normalize(dep) + + # Packages may create dependency cycles when specifying optional-dependencies / 'extras'. + # Example: github.com/google/etils/blob/a0b71032095db14acf6b33516bca6d885fe09e35/pyproject.toml#L32. + if dep == self.name: + return + + if platform: + self._select[platform].add(dep) + else: + self._deps.add(dep) + + @staticmethod + def _normalize(name: str) -> str: + return re.sub(r"[-_.]+", "_", name).lower() + + def add(self, *wheel_reqs: str) -> None: + reqs = [Requirement(wheel_req) for wheel_req in wheel_reqs] + + # Resolve any extra extras due to self-edges + self._want_extras = self._resolve_extras(reqs) + + # process self-edges first to resolve the extras used + for req in reqs: + self._add_req(req) + + def _resolve_extras(self, reqs: List[Requirement]) -> Set[str]: + """Resolve extras which are due to depending on self[some_other_extra]. + + Some packages may have cyclic dependencies resulting from extras being used, one example is + `elint`, where we have one set of extras as aliases for other extras + and we have an extra called 'all' that includes all other extras. + + When the `requirements.txt` is generated by `pip-tools`, then it is likely that + this step is not needed, but for other `requirements.txt` files this may be useful. + + NOTE @aignas 2023-12-08: the extra resolution is not platform dependent, but + in order for it to become platform dependent we would have to have separate targets for each extra in + self._want_extras. + """ + extras = self._want_extras + + self_reqs = [] + for req in reqs: + if Deps._normalize(req.name) != self.name: + continue + + if req.marker is None: + # I am pretty sure we cannot reach this code as it does not + # make sense to specify packages in this way, but since it is + # easy to handle, lets do it. + # + # TODO @aignas 2023-12-08: add a test + extras = extras | req.extras + else: + # process these in a separate loop + self_reqs.append(req) + + # A double loop is not strictly optimal, but always correct without recursion + for req in self_reqs: + if any(req.marker.evaluate({"extra": extra}) for extra in extras): + extras = extras | req.extras + else: + continue + + # Iterate through all packages to ensure that we include all of the extras from previously + # visited packages. + for req_ in self_reqs: + if any(req_.marker.evaluate({"extra": extra}) for extra in extras): + extras = extras | req_.extras + + return extras + + def _add_req(self, req: Requirement) -> None: + extras = self._want_extras + + if req.marker is None: + self._add(req.name, None) + return + + marker_str = str(req.marker) + + # NOTE @aignas 2023-12-08: in order to have reasonable select statements + # we do have to have some parsing of the markers, so it begs the question + # if packaging should be reimplemented in Starlark to have the best solution + # for now we will implement it in Python and see what the best parsing result + # can be before making this decision. + if not self._platforms or not any( + tag in marker_str + for tag in [ + "os_name", + "sys_platform", + "platform_machine", + "platform_system", + ] + ): + if any(req.marker.evaluate({"extra": extra}) for extra in extras): + self._add(req.name, None) + return + + for plat in self._platforms: + if not any( + req.marker.evaluate(plat.env_markers(extra)) for extra in extras + ): + continue + + if "platform_machine" in marker_str: + self._add(req.name, plat) + else: + self._add(req.name, Platform(plat.os)) + + def build(self) -> FrozenDeps: + if not self._select: + return FrozenDeps( + deps=sorted(self._deps), + deps_select={}, + ) + + # Get all of the OS-specific dependencies applicable to all architectures + select = { + p: deps for p, deps in self._select.items() if deps and p.arch is None + } + # Now add them to all arch specific dependencies + select.update( + { + p: deps | select.get(Platform(p.os), set()) + for p, deps in self._select.items() + if deps and p.arch is not None + } + ) + + return FrozenDeps( + deps=sorted(self._deps), + deps_select={str(p): sorted(deps) for p, deps in sorted(select.items())}, + ) + + class Wheel: """Representation of the compressed .whl file""" - def __init__(self, path: str): + def __init__(self, path: Path): self._path = path @property @@ -70,19 +457,31 @@ def entry_points(self) -> Dict[str, Tuple[str, str]]: return entry_points_mapping - def dependencies(self, extras_requested: Optional[Set[str]] = None) -> Set[str]: - dependency_set = set() + def dependencies( + self, + extras_requested: Set[str] = None, + platforms: Optional[Set[Platform]] = None, + ) -> FrozenDeps: + if platforms: + # NOTE @aignas 2023-12-04: if the wheel is a platform specific wheel, we only include deps for that platform + _, _, platform_tag = self._path.name.rpartition("-") + platform_tag = platform_tag[:-4] # strip .whl + if platform_tag != "any": + platform = Platform.from_tag(platform_tag) + assert ( + platform in platforms + ), f"BUG: wheel platform '{platform}' must be one of '{platforms}'" + platforms = {platform} + dependency_set = Deps( + self.name, + extras=extras_requested, + platforms=platforms, + ) for wheel_req in self.metadata.get_all("Requires-Dist", []): - req = pkg_resources.Requirement(wheel_req) # type: ignore - - if req.marker is None or any( - req.marker.evaluate({"extra": extra}) - for extra in extras_requested or [""] - ): - dependency_set.add(req.name) # type: ignore + dependency_set.add(wheel_req) - return dependency_set + return dependency_set.build() def unzip(self, directory: str) -> None: installation_schemes = { diff --git a/python/pip_install/tools/wheel_installer/wheel_installer.py b/python/pip_install/tools/wheel_installer/wheel_installer.py index f5ed8c3db8..801ef959f0 100644 --- a/python/pip_install/tools/wheel_installer/wheel_installer.py +++ b/python/pip_install/tools/wheel_installer/wheel_installer.py @@ -14,19 +14,16 @@ """Build and/or fetch a single wheel based on the requirement passed in""" -import argparse import errno import glob import json import os import re -import shutil import subprocess import sys -import textwrap from pathlib import Path from tempfile import NamedTemporaryFile -from typing import Dict, Iterable, List, Optional, Set, Tuple +from typing import Dict, List, Optional, Set, Tuple from pip._vendor.packaging.utils import canonicalize_name @@ -108,6 +105,7 @@ def _extract_wheel( wheel_file: str, extras: Dict[str, Set[str]], enable_implicit_namespace_pkgs: bool, + platforms: List[wheel.Platform], installation_dir: Path = Path("."), ) -> None: """Extracts wheel into given directory and creates py_library and filegroup targets. @@ -126,16 +124,15 @@ def _extract_wheel( _setup_namespace_pkg_compatibility(installation_dir) extras_requested = extras[whl.name] if whl.name in extras else set() - # Packages may create dependency cycles when specifying optional-dependencies / 'extras'. - # Example: github.com/google/etils/blob/a0b71032095db14acf6b33516bca6d885fe09e35/pyproject.toml#L32. - self_edge_dep = set([whl.name]) - whl_deps = sorted(whl.dependencies(extras_requested) - self_edge_dep) + + dependencies = whl.dependencies(extras_requested, platforms) with open(os.path.join(installation_dir, "metadata.json"), "w") as f: metadata = { "name": whl.name, "version": whl.version, - "deps": whl_deps, + "deps": dependencies.deps, + "deps_by_platform": dependencies.deps_select, "entry_points": [ { "name": name, @@ -164,6 +161,7 @@ def main() -> None: wheel_file=whl, extras=extras, enable_implicit_namespace_pkgs=args.enable_implicit_namespace_pkgs, + platforms=arguments.get_platforms(args), ) return diff --git a/python/pip_install/tools/wheel_installer/wheel_installer_test.py b/python/pip_install/tools/wheel_installer/wheel_installer_test.py index b24e50053f..6eacd1fda5 100644 --- a/python/pip_install/tools/wheel_installer/wheel_installer_test.py +++ b/python/pip_install/tools/wheel_installer/wheel_installer_test.py @@ -19,7 +19,7 @@ import unittest from pathlib import Path -from python.pip_install.tools.wheel_installer import wheel_installer +from python.pip_install.tools.wheel_installer import wheel, wheel_installer class TestRequirementExtrasParsing(unittest.TestCase): @@ -55,31 +55,6 @@ def test_parses_requirement_for_extra(self) -> None: ) -# TODO @aignas 2023-07-21: migrate to starlark -# class BazelTestCase(unittest.TestCase): -# def test_generate_entry_point_contents(self): -# got = wheel_installer._generate_entry_point_contents("sphinx.cmd.build", "main") -# want = """#!/usr/bin/env python3 -# import sys -# from sphinx.cmd.build import main -# if __name__ == "__main__": -# sys.exit(main()) -# """ -# self.assertEqual(got, want) -# -# def test_generate_entry_point_contents_with_shebang(self): -# got = wheel_installer._generate_entry_point_contents( -# "sphinx.cmd.build", "main", shebang="#!/usr/bin/python" -# ) -# want = """#!/usr/bin/python -# import sys -# from sphinx.cmd.build import main -# if __name__ == "__main__": -# sys.exit(main()) -# """ -# self.assertEqual(got, want) - - class TestWhlFilegroup(unittest.TestCase): def setUp(self) -> None: self.wheel_name = "example_minimal_package-0.0.1-py3-none-any.whl" @@ -92,10 +67,11 @@ def tearDown(self): def test_wheel_exists(self) -> None: wheel_installer._extract_wheel( - self.wheel_path, + Path(self.wheel_path), installation_dir=Path(self.wheel_dir), extras={}, enable_implicit_namespace_pkgs=False, + platforms=[], ) want_files = [ @@ -119,10 +95,34 @@ def test_wheel_exists(self) -> None: version="0.0.1", name="example-minimal-package", deps=[], + deps_by_platform={}, entry_points=[], ) self.assertEqual(want, metadata_file_content) +class TestWheelPlatform(unittest.TestCase): + def test_wheel_os_alias(self): + self.assertEqual("OS.osx", str(wheel.OS.osx)) + self.assertEqual(str(wheel.OS.darwin), str(wheel.OS.osx)) + + def test_wheel_arch_alias(self): + self.assertEqual("Arch.x86_64", str(wheel.Arch.x86_64)) + self.assertEqual(str(wheel.Arch.amd64), str(wheel.Arch.x86_64)) + + def test_wheel_platform_alias(self): + give = wheel.Platform( + os=wheel.OS.darwin, + arch=wheel.Arch.amd64, + ) + alias = wheel.Platform( + os=wheel.OS.osx, + arch=wheel.Arch.x86_64, + ) + + self.assertEqual("osx_x86_64", str(give)) + self.assertEqual(str(alias), str(give)) + + if __name__ == "__main__": unittest.main() diff --git a/python/pip_install/tools/wheel_installer/wheel_test.py b/python/pip_install/tools/wheel_installer/wheel_test.py new file mode 100644 index 0000000000..57bfa9458a --- /dev/null +++ b/python/pip_install/tools/wheel_installer/wheel_test.py @@ -0,0 +1,235 @@ +import unittest + +from python.pip_install.tools.wheel_installer import wheel + + +class DepsTest(unittest.TestCase): + def test_simple(self): + deps = wheel.Deps("foo") + deps.add("bar") + + got = deps.build() + + self.assertIsInstance(got, wheel.FrozenDeps) + self.assertEqual(["bar"], got.deps) + self.assertEqual({}, got.deps_select) + + def test_can_add_os_specific_deps(self): + platforms = { + "linux_x86_64", + "osx_x86_64", + "windows_x86_64", + } + deps = wheel.Deps("foo", platforms=set(wheel.Platform.from_string(platforms))) + deps.add( + "bar", + "posix_dep; os_name=='posix'", + "win_dep; os_name=='nt'", + ) + + got = deps.build() + + self.assertEqual(["bar"], got.deps) + self.assertEqual( + { + "@platforms//os:linux": ["posix_dep"], + "@platforms//os:osx": ["posix_dep"], + "@platforms//os:windows": ["win_dep"], + }, + got.deps_select, + ) + + def test_can_add_platform_specific_deps(self): + platforms = { + "linux_x86_64", + "osx_x86_64", + "osx_aarch64", + "windows_x86_64", + } + deps = wheel.Deps("foo", platforms=set(wheel.Platform.from_string(platforms))) + deps.add( + "bar", + "posix_dep; os_name=='posix'", + "m1_dep; sys_platform=='darwin' and platform_machine=='arm64'", + "win_dep; os_name=='nt'", + ) + + got = deps.build() + + self.assertEqual(["bar"], got.deps) + self.assertEqual( + { + "osx_aarch64": ["m1_dep", "posix_dep"], + "@platforms//os:linux": ["posix_dep"], + "@platforms//os:osx": ["posix_dep"], + "@platforms//os:windows": ["win_dep"], + }, + got.deps_select, + ) + + def test_non_platform_markers_are_added_to_common_deps(self): + platforms = { + "linux_x86_64", + "osx_x86_64", + "osx_aarch64", + "windows_x86_64", + } + deps = wheel.Deps("foo", platforms=set(wheel.Platform.from_string(platforms))) + deps.add( + "bar", + "baz; implementation_name=='cpython'", + "m1_dep; sys_platform=='darwin' and platform_machine=='arm64'", + ) + + got = deps.build() + + self.assertEqual(["bar", "baz"], got.deps) + self.assertEqual( + { + "osx_aarch64": ["m1_dep"], + }, + got.deps_select, + ) + + def test_self_is_ignored(self): + deps = wheel.Deps("foo", extras={"ssl"}) + deps.add( + "bar", + "req_dep; extra == 'requests'", + "foo[requests]; extra == 'ssl'", + "ssl_lib; extra == 'ssl'", + ) + + got = deps.build() + + self.assertEqual(["bar", "req_dep", "ssl_lib"], got.deps) + self.assertEqual({}, got.deps_select) + + def test_handle_etils(self): + deps = wheel.Deps("etils", extras={"all"}) + requires = """ +etils[array-types] ; extra == "all" +etils[eapp] ; extra == "all" +etils[ecolab] ; extra == "all" +etils[edc] ; extra == "all" +etils[enp] ; extra == "all" +etils[epath] ; extra == "all" +etils[epath-gcs] ; extra == "all" +etils[epath-s3] ; extra == "all" +etils[epy] ; extra == "all" +etils[etqdm] ; extra == "all" +etils[etree] ; extra == "all" +etils[etree-dm] ; extra == "all" +etils[etree-jax] ; extra == "all" +etils[etree-tf] ; extra == "all" +etils[enp] ; extra == "array-types" +pytest ; extra == "dev" +pytest-subtests ; extra == "dev" +pytest-xdist ; extra == "dev" +pyink ; extra == "dev" +pylint>=2.6.0 ; extra == "dev" +chex ; extra == "dev" +torch ; extra == "dev" +optree ; extra == "dev" +dataclass_array ; extra == "dev" +sphinx-apitree[ext] ; extra == "docs" +etils[dev,all] ; extra == "docs" +absl-py ; extra == "eapp" +simple_parsing ; extra == "eapp" +etils[epy] ; extra == "eapp" +jupyter ; extra == "ecolab" +numpy ; extra == "ecolab" +mediapy ; extra == "ecolab" +packaging ; extra == "ecolab" +etils[enp] ; extra == "ecolab" +etils[epy] ; extra == "ecolab" +etils[epy] ; extra == "edc" +numpy ; extra == "enp" +etils[epy] ; extra == "enp" +fsspec ; extra == "epath" +importlib_resources ; extra == "epath" +typing_extensions ; extra == "epath" +zipp ; extra == "epath" +etils[epy] ; extra == "epath" +gcsfs ; extra == "epath-gcs" +etils[epath] ; extra == "epath-gcs" +s3fs ; extra == "epath-s3" +etils[epath] ; extra == "epath-s3" +typing_extensions ; extra == "epy" +absl-py ; extra == "etqdm" +tqdm ; extra == "etqdm" +etils[epy] ; extra == "etqdm" +etils[array_types] ; extra == "etree" +etils[epy] ; extra == "etree" +etils[enp] ; extra == "etree" +etils[etqdm] ; extra == "etree" +dm-tree ; extra == "etree-dm" +etils[etree] ; extra == "etree-dm" +jax[cpu] ; extra == "etree-jax" +etils[etree] ; extra == "etree-jax" +tensorflow ; extra == "etree-tf" +etils[etree] ; extra == "etree-tf" +etils[ecolab] ; extra == "lazy-imports" +""" + + deps.add(*requires.strip().split("\n")) + + got = deps.build() + want = [ + "absl_py", + "dm_tree", + "fsspec", + "gcsfs", + "importlib_resources", + "jax", + "jupyter", + "mediapy", + "numpy", + "packaging", + "s3fs", + "simple_parsing", + "tensorflow", + "tqdm", + "typing_extensions", + "zipp", + ] + + self.assertEqual(want, got.deps) + self.assertEqual({}, got.deps_select) + + +class PlatformTest(unittest.TestCase): + def test_platform_from_string(self): + tests = { + "win_amd64": "windows_x86_64", + "macosx_10_9_arm64": "osx_aarch64", + "manylinux1_i686.manylinux_2_17_i686": "linux_x86_32", + "musllinux_1_1_ppc64le": "linux_ppc", + } + + for give, want in tests.items(): + with self.subTest(give=give, want=want): + self.assertEqual( + wheel.Platform.from_string(want)[0], + wheel.Platform.from_tag(give), + ) + + def test_can_get_host(self): + host = wheel.Platform.host() + self.assertIsNotNone(host) + self.assertEqual(1, len(wheel.Platform.from_string("host"))) + self.assertEqual(host, wheel.Platform.from_string("host")) + + def test_can_get_all(self): + all_platforms = wheel.Platform.all() + self.assertEqual(15, len(all_platforms)) + self.assertEqual(all_platforms, wheel.Platform.from_string("all")) + + def test_can_get_all_for_os(self): + linuxes = wheel.Platform.all(wheel.OS.linux) + self.assertEqual(5, len(linuxes)) + self.assertEqual(linuxes, wheel.Platform.from_string("linux_*")) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 305039fb2e..3735ed84b3 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -158,6 +158,7 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): p: json.encode(args) for p, args in whl_overrides.get(whl_name, {}).items() }, + experimental_target_platforms = pip_attr.experimental_target_platforms, python_interpreter = pip_attr.python_interpreter, python_interpreter_target = python_interpreter_target, quiet = pip_attr.quiet, diff --git a/python/private/text_util.bzl b/python/private/text_util.bzl index da67001ce8..a1bec5e411 100644 --- a/python/private/text_util.bzl +++ b/python/private/text_util.bzl @@ -28,18 +28,18 @@ def _render_alias(name, actual): ")", ]) -def _render_dict(d): +def _render_dict(d, *, value_repr = repr): return "\n".join([ "{", _indent("\n".join([ - "{}: {},".format(repr(k), repr(v)) + "{}: {},".format(repr(k), value_repr(v)) for k, v in d.items() ])), "}", ]) -def _render_select(selects, *, no_match_error = None): - dict_str = _render_dict(selects) + "," +def _render_select(selects, *, no_match_error = None, value_repr = repr): + dict_str = _render_dict(selects, value_repr = value_repr) + "," if no_match_error: args = "\n".join([ @@ -58,6 +58,12 @@ def _render_select(selects, *, no_match_error = None): return "select({})".format(args) def _render_list(items): + if not items: + return "[]" + + if len(items) == 1: + return "[{}]".format(repr(items[0])) + return "\n".join([ "[", _indent("\n".join([ diff --git a/tests/pip_install/whl_library/generate_build_bazel_tests.bzl b/tests/pip_install/whl_library/generate_build_bazel_tests.bzl index c65beb54ae..b89477fd4c 100644 --- a/tests/pip_install/whl_library/generate_build_bazel_tests.bzl +++ b/tests/pip_install/whl_library/generate_build_bazel_tests.bzl @@ -39,7 +39,15 @@ filegroup( filegroup( name = "_whl", srcs = ["foo.whl"], - data = ["@pypi_bar_baz//:whl", "@pypi_foo//:whl"], + data = [ + "@pypi_bar_baz//:whl", + "@pypi_foo//:whl", + ] + select( + { + "@platforms//os:windows": ["@pypi_colorama//:whl"], + "//conditions:default": [], + }, + ), visibility = ["//visibility:private"], ) @@ -59,7 +67,15 @@ py_library( # This makes this directory a top-level in the python import # search path for anything that depends on this. imports = ["site-packages"], - deps = ["@pypi_bar_baz//:pkg", "@pypi_foo//:pkg"], + deps = [ + "@pypi_bar_baz//:pkg", + "@pypi_foo//:pkg", + ] + select( + { + "@platforms//os:windows": ["@pypi_colorama//:pkg"], + "//conditions:default": [], + }, + ), tags = ["tag1", "tag2"], visibility = ["//visibility:private"], ) @@ -78,6 +94,7 @@ alias( repo_prefix = "pypi_", whl_name = "foo.whl", dependencies = ["foo", "bar-baz"], + dependencies_by_platform = {"@platforms//os:windows": ["colorama"]}, data_exclude = [], tags = ["tag1", "tag2"], entry_points = {}, @@ -107,7 +124,10 @@ filegroup( filegroup( name = "_whl", srcs = ["foo.whl"], - data = ["@pypi_bar_baz//:whl", "@pypi_foo//:whl"], + data = [ + "@pypi_bar_baz//:whl", + "@pypi_foo//:whl", + ], visibility = ["//visibility:private"], ) @@ -127,7 +147,10 @@ py_library( # This makes this directory a top-level in the python import # search path for anything that depends on this. imports = ["site-packages"], - deps = ["@pypi_bar_baz//:pkg", "@pypi_foo//:pkg"], + deps = [ + "@pypi_bar_baz//:pkg", + "@pypi_foo//:pkg", + ], tags = ["tag1", "tag2"], visibility = ["//visibility:private"], ) @@ -162,6 +185,7 @@ copy_file( repo_prefix = "pypi_", whl_name = "foo.whl", dependencies = ["foo", "bar-baz"], + dependencies_by_platform = {}, data_exclude = [], tags = ["tag1", "tag2"], entry_points = {}, @@ -198,7 +222,10 @@ filegroup( filegroup( name = "_whl", srcs = ["foo.whl"], - data = ["@pypi_bar_baz//:whl", "@pypi_foo//:whl"], + data = [ + "@pypi_bar_baz//:whl", + "@pypi_foo//:whl", + ], visibility = ["//visibility:private"], ) @@ -218,7 +245,10 @@ py_library( # This makes this directory a top-level in the python import # search path for anything that depends on this. imports = ["site-packages"], - deps = ["@pypi_bar_baz//:pkg", "@pypi_foo//:pkg"], + deps = [ + "@pypi_bar_baz//:pkg", + "@pypi_foo//:pkg", + ], tags = ["tag1", "tag2"], visibility = ["//visibility:private"], ) @@ -246,6 +276,7 @@ py_binary( repo_prefix = "pypi_", whl_name = "foo.whl", dependencies = ["foo", "bar-baz"], + dependencies_by_platform = {}, data_exclude = [], tags = ["tag1", "tag2"], entry_points = {"fizz": "buzz.py"}, @@ -275,7 +306,16 @@ filegroup( filegroup( name = "_whl", srcs = ["foo.whl"], - data = ["@pypi_bar_baz//:whl"], + data = ["@pypi_bar_baz//:whl"] + select( + { + ":is_linux_x86_64": [ + "@pypi_box//:whl", + "@pypi_box_amd64//:whl", + ], + "@platforms//os:linux": ["@pypi_box//:whl"], + "//conditions:default": [], + }, + ), visibility = ["@pypi__groups//:__pkg__"], ) @@ -295,7 +335,16 @@ py_library( # This makes this directory a top-level in the python import # search path for anything that depends on this. imports = ["site-packages"], - deps = ["@pypi_bar_baz//:pkg"], + deps = ["@pypi_bar_baz//:pkg"] + select( + { + ":is_linux_x86_64": [ + "@pypi_box//:pkg", + "@pypi_box_amd64//:pkg", + ], + "@platforms//os:linux": ["@pypi_box//:pkg"], + "//conditions:default": [], + }, + ), tags = [], visibility = ["@pypi__groups//:__pkg__"], ) @@ -309,17 +358,31 @@ alias( name = "whl", actual = "@pypi__groups//:qux_whl", ) + +config_setting( + name = "is_linux_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + visibility = ["//visibility:private"], +) """ actual = generate_whl_library_build_bazel( repo_prefix = "pypi_", whl_name = "foo.whl", dependencies = ["foo", "bar-baz", "qux"], + dependencies_by_platform = { + "linux_x86_64": ["box", "box-amd64"], + "windows_x86_64": ["fox"], + "@platforms//os:linux": ["box"], # buildifier: disable=unsorted-dict-items + }, tags = [], entry_points = {}, data_exclude = [], annotation = None, group_name = "qux", - group_deps = ["foo", "qux"], + group_deps = ["foo", "fox", "qux"], ) env.expect.that_str(actual).equals(want)