Skip to content

Commit

Permalink
[internal] Replace pkgutil.get_data with new read_resource API (#…
Browse files Browse the repository at this point in the history
…16379)

Pants uses `pkgutil.get_data()` in all manner of places to load resource files that are included within the Pants Python package. `get_data()` is not supported in the PEP302 pluggable importer used by PyOxidizer. `importlib.resources`, however, _is_ supported.

`importlib.resources` (as far as PyOxidizer is concerned, at the very least) has some caveats:

* Resources can only be loaded from packages that contain an `__init__.py` file (i.e. are a non-namespace package)
* Resources cannot be `.py` files

This adds a shim function called `read_resource()` that allows reading resource files with more-or-less the same API as `pkgutil.get_data()` (in particular, allowing `read_resource(__name__, …)`, which is used commonly), but plugs into `importlib.resources.read_binary()` to allow for better portability.

A symlink to `src/python/pants/VERSION` is added at `src/python/pants/_version/VERSION`, so that the resource can be loaded with the new API, without immediately breaking the hard-coded version file location required by our setup script.

Finally, the python dependency parser has been renamed to `dependency_parser_py`, so that it can be loaded as a resource, and still treated as a `python_source` for Pants linting purposes.

Addresses #7369.
  • Loading branch information
Christopher Neugebauer authored Aug 4, 2022
1 parent 671cf3e commit a01f375
Show file tree
Hide file tree
Showing 17 changed files with 93 additions and 21 deletions.
6 changes: 4 additions & 2 deletions src/python/pants/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,17 @@ python_sources(
overrides={
# Enable `python -m pants ...` style execution ala `json.tool` or `venv`.
"__main__.py": {"dependencies": ["src/python/pants/bin:pants_loader"]},
"version.py": {"dependencies": ["./VERSION:resources"]},
},
)

python_test_utils(name="test_utils")

python_distribution(
name="pants-packaged",
dependencies=["./__main__.py", ":resources"],
dependencies=[
"./__main__.py",
":resources",
],
# Because we have native code, this will cause the wheel to use whatever the ABI is for the
# interpreter used to run setup.py, e.g. `cp36m-macosx_10_15_x86_64`.
sdist=False,
Expand Down
12 changes: 12 additions & 0 deletions src/python/pants/_version/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright 2022 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

# This package is used to provide a non-namespaced Python package from which
# `importlib.resources.read_binary` can read our `VERSION` file. The `VERSION` file is a
# symlink to the concrete `VERSION` file in `src/python/pants`. This creates a minimal package
# that can be imported by `pants.version` during tests, and inferred by Pants as a dependency.
# Future versions of `importlib.resources` will be able to read resources from namespace
# packages, at which point, this package will no longer need to exist.

python_sources(dependencies=["./VERSION:resources"])
resources(name="resources", sources=["VERSION"])
1 change: 1 addition & 0 deletions src/python/pants/_version/VERSION
Empty file.
4 changes: 2 additions & 2 deletions src/python/pants/backend/codegen/protobuf/scala/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from __future__ import annotations

import os
import pkgutil
from dataclasses import dataclass

from pants.backend.codegen import export_codegen_goal
Expand Down Expand Up @@ -55,6 +54,7 @@
from pants.source.source_root import SourceRoot, SourceRootRequest
from pants.util.logging import LogLevel
from pants.util.ordered_set import FrozenOrderedSet
from pants.util.resources import read_resource


class GenerateScalaFromProtobufRequest(GenerateSourcesRequest):
Expand Down Expand Up @@ -245,7 +245,7 @@ async def setup_scalapb_shim_classfiles(
) -> ScalaPBShimCompiledClassfiles:
dest_dir = "classfiles"

scalapb_shim_content = pkgutil.get_data(
scalapb_shim_content = read_resource(
"pants.backend.codegen.protobuf.scala", "ScalaPBShim.scala"
)
if not scalapb_shim_content:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from __future__ import annotations

import json
import pkgutil
from dataclasses import dataclass
from pathlib import PurePath

Expand Down Expand Up @@ -32,6 +31,7 @@
from pants.engine.unions import UnionRule
from pants.util.docutil import git_url
from pants.util.logging import LogLevel
from pants.util.resources import read_resource

_DOCKERFILE_SANDBOX_TOOL = "dockerfile_wrapper_script.py"
_DOCKERFILE_PACKAGE = "pants.backend.docker.subsystems"
Expand Down Expand Up @@ -74,7 +74,7 @@ class ParserSetup:

@rule
async def setup_parser(dockerfile_parser: DockerfileParser) -> ParserSetup:
parser_script_content = pkgutil.get_data(_DOCKERFILE_PACKAGE, _DOCKERFILE_SANDBOX_TOOL)
parser_script_content = read_resource(_DOCKERFILE_PACKAGE, _DOCKERFILE_SANDBOX_TOOL)
if not parser_script_content:
raise ValueError(
f"Unable to find source to {_DOCKERFILE_SANDBOX_TOOL!r} in {_DOCKERFILE_PACKAGE}."
Expand Down
4 changes: 2 additions & 2 deletions src/python/pants/backend/go/go_sources/load_go_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from __future__ import annotations

import pkgutil
from dataclasses import dataclass

from pants.backend.go.util_rules.build_pkg import BuildGoPackageRequest, BuiltGoPackage
Expand All @@ -12,6 +11,7 @@
from pants.engine.engine_aware import EngineAwareParameter
from pants.engine.fs import CreateDigest, Digest, FileContent, MergeDigests
from pants.engine.rules import Get, MultiGet, collect_rules, rule
from pants.util.resources import read_resource


@dataclass(frozen=True)
Expand All @@ -31,7 +31,7 @@ def debug_hint(self) -> str:

def setup_files(dir_name: str, file_names: tuple[str, ...]) -> tuple[FileContent, ...]:
def get_file(file_name: str) -> bytes:
content = pkgutil.get_data(f"pants.backend.go.go_sources.{dir_name}", file_name)
content = read_resource(f"pants.backend.go.go_sources.{dir_name}", file_name)
if not content:
raise AssertionError(f"Unable to find resource for `{file_name}`.")
return content
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import json
import os
import pkgutil
from dataclasses import dataclass
from typing import Any, Iterator

Expand All @@ -25,6 +24,7 @@
from pants.util.frozendict import FrozenDict
from pants.util.logging import LogLevel
from pants.util.ordered_set import FrozenOrderedSet
from pants.util.resources import read_resource

_PARSER_KOTLIN_VERSION = "1.6.20"

Expand Down Expand Up @@ -230,7 +230,7 @@ async def resolve_fallible_result_to_analysis(
async def setup_kotlin_parser_classfiles(jdk: InternalJdk) -> KotlinParserCompiledClassfiles:
dest_dir = "classfiles"

parser_source_content = pkgutil.get_data(
parser_source_content = read_resource(
"pants.backend.kotlin.dependency_inference", "KotlinParser.kt"
)
if not parser_source_content:
Expand Down
6 changes: 5 additions & 1 deletion src/python/pants/backend/python/dependency_inference/BUILD
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

python_sources()
python_sources(
dependencies=[
"./scripts:dependency_parser",
],
)

python_tests(
name="tests",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import json
import pkgutil
from dataclasses import dataclass

from pants.backend.python.target_types import PythonSourceField
Expand All @@ -16,6 +15,7 @@
from pants.engine.rules import Get, MultiGet, collect_rules, rule
from pants.util.frozendict import FrozenDict
from pants.util.logging import LogLevel
from pants.util.resources import read_resource


@dataclass(frozen=True)
Expand Down Expand Up @@ -61,7 +61,7 @@ class ParserScript:

@rule
async def parser_script() -> ParserScript:
script = pkgutil.get_data(__name__, "scripts/dependency_parser.py")
script = read_resource(__name__, "scripts/dependency_parser_py")
assert script is not None
return ParserScript(
await Get(Digest, CreateDigest([FileContent("__parse_python_dependencies.py", script)]))
Expand Down
15 changes: 13 additions & 2 deletions src/python/pants/backend/python/dependency_inference/scripts/BUILD
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
# Copyright 2022 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

resource(name="dependency_parser", source="dependency_parser.py")
resource(
name="dependency_parser",
source="dependency_parser_py",
dependencies=[
":init_py",
],
)
resource(
name="init_py",
source="__init__.py",
)

# Also expose scripts as python sources so they get formatted/linted/checked.
python_source(
name="dependency_parser_source",
source="dependency_parser.py",
source="dependency_parser_py",
# This is run with Python 2.7 and 3.5+, so we shouldn't be running pyupgrade.
# skip_pyupgrade=True,
)
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import json
import logging
import os
import pkgutil
from dataclasses import dataclass
from typing import Any, Iterator, Mapping

Expand Down Expand Up @@ -39,6 +38,7 @@
from pants.util.frozendict import FrozenDict
from pants.util.logging import LogLevel
from pants.util.ordered_set import FrozenOrderedSet
from pants.util.resources import read_resource

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -316,7 +316,7 @@ async def resolve_fallible_result_to_analysis(
async def setup_scala_parser_classfiles(jdk: InternalJdk) -> ScalaParserCompiledClassfiles:
dest_dir = "classfiles"

parser_source_content = pkgutil.get_data(
parser_source_content = read_resource(
"pants.backend.scala.dependency_inference", "ScalaParser.scala"
)
if not parser_source_content:
Expand Down
4 changes: 2 additions & 2 deletions src/python/pants/backend/terraform/dependency_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).
from __future__ import annotations

import pkgutil
from dataclasses import dataclass
from pathlib import PurePath

Expand Down Expand Up @@ -33,6 +32,7 @@
from pants.util.docutil import git_url
from pants.util.logging import LogLevel
from pants.util.ordered_set import OrderedSet
from pants.util.resources import read_resource


class TerraformHcl2Parser(PythonToolRequirementsBase):
Expand Down Expand Up @@ -72,7 +72,7 @@ class ParserSetup:

@rule
async def setup_parser(hcl2_parser: TerraformHcl2Parser) -> ParserSetup:
parser_script_content = pkgutil.get_data("pants.backend.terraform", "hcl2_parser.py")
parser_script_content = read_resource("pants.backend.terraform", "hcl2_parser.py")
if not parser_script_content:
raise ValueError("Unable to find source to hcl2_parser.py wrapper script.")

Expand Down
36 changes: 36 additions & 0 deletions src/python/pants/util/resources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright 2022 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).


import importlib
from importlib import resources
from itertools import chain


def read_resource(package_or_module: str, resource: str) -> bytes:
"""Reads a resource file from within the Pants package itself.
This helper function is designed for compatibility with `pkgutil.get_data()` wherever possible,
but also allows compability with PEP302 pluggable importers such as included with PyOxidizer.
This requires that resources are loaded from a valid Python package (i.e. must have an
`__init__.py` file in the directory).
"""

a = importlib.import_module(package_or_module)
package_ = a.__package__

if package_ is None:
raise ValueError(
"`read_resource` can only help find resources for packages or modules that live in "
"a package."
)

resource_parts = resource.split("/")

if len(resource_parts) == 1:
package = package_
else:
package = ".".join(chain((package_,), resource_parts[:-1]))
resource = resource_parts[-1]

return resources.read_binary(package, resource)
10 changes: 8 additions & 2 deletions src/python/pants/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import os
import pkgutil

from packaging.version import Version

# Generate a inferrable dependency on the `pants._version` package and its associated resources.
import pants._version
from pants.util.resources import read_resource

# Set this env var to override the version pants reports. Useful for testing.
_PANTS_VERSION_OVERRIDE = "_PANTS_VERSION_OVERRIDE"

Expand All @@ -14,7 +17,10 @@
os.environ.get(_PANTS_VERSION_OVERRIDE)
or
# NB: We expect VERSION to always have an entry and want a runtime failure if this is false.
pkgutil.get_data(__name__, "VERSION").decode().strip() # type: ignore[union-attr]
# `pants._version` is a non-namespace package that can be loaded by `importlib.resources` in
# pre-3.11 versions of Python. Once we can read `pants.VERSION` using the resource loader,
# we can remove the `pants._version` package. See #16379.
read_resource(pants._version.__name__, "VERSION").decode().strip()
)

PANTS_SEMVER = Version(VERSION)
Expand Down

0 comments on commit a01f375

Please sign in to comment.