Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: Add PEP-658 support #90

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions micropip/_compat_in_pyodide.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from io import BytesIO
from typing import IO
from urllib.parse import urlparse

from pyodide._package_loader import get_dynlibs
Expand All @@ -20,15 +18,15 @@
# Otherwise, this is pytest test collection so let it go.


async def fetch_bytes(url: str, kwargs: dict[str, str]) -> IO[bytes]:
async def fetch_bytes(url: str, kwargs: dict[str, str]) -> bytes:
parsed_url = urlparse(url)
if parsed_url.scheme == "emfs":
return open(parsed_url.path, "rb")
if parsed_url.scheme == "file":
result_bytes = (await loadBinaryFile(parsed_url.path)).to_bytes()
else:
result_bytes = await (await pyfetch(url, **kwargs)).bytes()
return BytesIO(result_bytes)
return result_bytes


async def fetch_string_and_headers(
Expand Down
5 changes: 2 additions & 3 deletions micropip/_compat_not_in_pyodide.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import re
from io import BytesIO
from pathlib import Path
from typing import IO, Any

Expand All @@ -20,9 +19,9 @@ def _fetch(url: str, kwargs: dict[str, Any]) -> addinfourl:
return urlopen(Request(url, **kwargs))


async def fetch_bytes(url: str, kwargs: dict[str, Any]) -> IO[bytes]:
async def fetch_bytes(url: str, kwargs: dict[str, Any]) -> bytes:
response = _fetch(url, kwargs=kwargs)
return BytesIO(response.read())
return response.read()


async def fetch_string_and_headers(
Expand Down
4 changes: 4 additions & 0 deletions micropip/package_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ def _compatible_wheels(
hashes = file["digests"] if "digests" in file else file["hashes"]
sha256 = hashes.get("sha256")

# Check if the metadata file is available (PEP 658)
data_dist_info_metadata = file.get("data-dist-info-metadata")

# Size of the file in bytes, if available (PEP 700)
# This key is not available in the Simple API HTML response, so this field may be None
size = file.get("size")
Expand All @@ -161,6 +164,7 @@ def _compatible_wheels(
version=version,
sha256=sha256,
size=size,
data_dist_info_metadata=data_dist_info_metadata,
)

releases_compatible = {
Expand Down
19 changes: 17 additions & 2 deletions micropip/transaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,9 +231,24 @@ async def add_wheel(
logger.info(f"Collecting {wheel.name}{specifier}")
logger.info(f" Downloading {wheel.url.split('/')[-1]}")

await wheel.download(self.fetch_kwargs)
wheel_download_task = asyncio.create_task(wheel.download(self.fetch_kwargs))
if self.deps:
await self.gather_requirements(wheel.requires(extras))
# Case 1) If metadata file is available,
# we can gather requirements without waiting for the wheel to be downloaded.
if wheel.pep658_metadata_available():
try:
await wheel.download_pep658_metadata(self.fetch_kwargs)
except OSError:
# If something goes wrong while downloading the metadata,
# we have to wait for the wheel to be downloaded.
await wheel_download_task
await self.gather_requirements(wheel.requires(extras))

# Case 2) If metadata file is not available,
# we have to wait for the wheel to be downloaded.
else:
await wheel_download_task
await self.gather_requirements(wheel.requires(extras))

self.wheels.append(wheel)

Expand Down
98 changes: 67 additions & 31 deletions micropip/wheelinfo.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import asyncio
import hashlib
import io
import json
import zipfile
from dataclasses import dataclass
from pathlib import Path
from typing import IO, Any
from typing import Any
from urllib.parse import ParseResult, urlparse

from packaging.requirements import Requirement
Expand Down Expand Up @@ -36,10 +37,13 @@ class WheelInfo:
parsed_url: ParseResult
sha256: str | None = None
size: int | None = None # Size in bytes, if available (PEP 700)
data_dist_info_metadata: bool | dict[
str, str
] | None = None # Whether the package index exposes the wheel's metadata (PEP 658)

# Fields below are only available after downloading the wheel, i.e. after calling `download()`.

_data: IO[bytes] | None = None # Wheel file contents.
_data: bytes | None = None # Wheel file contents.
_metadata: Metadata | None = None # Wheel metadata.
_requires: list[Requirement] | None = None # List of requirements.

Expand Down Expand Up @@ -77,6 +81,7 @@ def from_package_index(
version: Version,
sha256: str | None,
size: int | None,
data_dist_info_metadata: bool = False,
) -> "WheelInfo":
"""Extract available metadata from response received from package index"""
parsed_url = urlparse(url)
Expand All @@ -92,6 +97,7 @@ def from_package_index(
parsed_url=parsed_url,
sha256=sha256,
size=size,
data_dist_info_metadata=data_dist_info_metadata,
)

async def install(self, target: Path) -> None:
Expand All @@ -109,7 +115,8 @@ async def install(self, target: Path) -> None:
raise RuntimeError(
"Micropip internal error: attempted to install wheel before downloading it?"
)
self._validate()
_validate_sha256_checksum(self._data, self.sha256)

self._extract(target)
await self._load_libraries(target)
self._set_installer()
Expand All @@ -118,10 +125,44 @@ async def download(self, fetch_kwargs: dict[str, Any]):
if self._data is not None:
return

self._data = await self._fetch_bytes(fetch_kwargs)
with zipfile.ZipFile(self._data) as zf:
metadata_path = wheel_dist_info_dir(zf, self.name) + "/" + Metadata.PKG_INFO
self._metadata = Metadata(zipfile.Path(zf, metadata_path))
self._data = await self._fetch_bytes(self.url, fetch_kwargs)

if self._metadata is None:
with zipfile.ZipFile(io.BytesIO(self._data)) as zf:
metadata_path = (
wheel_dist_info_dir(zf, self.name) + "/" + Metadata.PKG_INFO
)
self._metadata = Metadata(zipfile.Path(zf, metadata_path))

def pep658_metadata_available(self) -> bool:
"""
Check if the wheel's metadata is exposed via PEP 658.
"""
return self.data_dist_info_metadata is not None

async def download_pep658_metadata(
self, fetch_kwargs: dict[str, Any] = None
) -> dict[str, str]:
"""
Download the wheel's metadata exposed via PEP 658.
"""
if fetch_kwargs is None:
fetch_kwargs = {}
if self.data_dist_info_metadata is None:
raise RuntimeError(
"Micropip internal error: the package index does not expose the wheel's metadata via PEP 658."
)

metadata_url = self.url + ".metadata"
data = await self._fetch_bytes(metadata_url, fetch_kwargs)

match self.data_dist_info_metadata:
case {"sha256": checksum}: # sha256 checksum available
_validate_sha256_checksum(data, checksum)
case _: # no checksum available
pass

self._metadata = Metadata(data)

def requires(self, extras: set[str]) -> list[Requirement]:
"""
Expand All @@ -136,9 +177,9 @@ def requires(self, extras: set[str]) -> list[Requirement]:
self._requires = requires
return requires

async def _fetch_bytes(self, fetch_kwargs: dict[str, Any]):
async def _fetch_bytes(self, url: str, fetch_kwargs: dict[str, Any]):
try:
return await fetch_bytes(self.url, fetch_kwargs)
return await fetch_bytes(url, fetch_kwargs)
except OSError as e:
if self.parsed_url.hostname in [
"files.pythonhosted.org",
Expand All @@ -153,20 +194,9 @@ async def _fetch_bytes(self, fetch_kwargs: dict[str, Any]):
"Check if the server is sending the correct 'Access-Control-Allow-Origin' header."
) from e

def _validate(self):
if self.sha256 is None:
# No checksums available, e.g. because installing
# from a different location than PyPI.
return

assert self._data
sha256_actual = _generate_package_hash(self._data)
if sha256_actual != self.sha256:
raise ValueError("Contents don't match hash")

def _extract(self, target: Path) -> None:
assert self._data
with zipfile.ZipFile(self._data) as zf:
with zipfile.ZipFile(io.BytesIO(self._data)) as zf:
zf.extractall(target)
self._dist_info = target / wheel_dist_info_dir(zf, self.name)

Expand Down Expand Up @@ -198,16 +228,22 @@ async def _load_libraries(self, target: Path) -> None:
TODO: integrate with pyodide's dynamic library loading mechanism.
"""
assert self._data
dynlibs = get_dynlibs(self._data, ".whl", target)
dynlibs = get_dynlibs(io.BytesIO(self._data), ".whl", target)
await asyncio.gather(*map(lambda dynlib: loadDynlib(dynlib, False), dynlibs))


def _generate_package_hash(data: IO[bytes]) -> str:
"""
Generate a SHA256 hash of the package data.
"""
sha256_hash = hashlib.sha256()
data.seek(0)
while chunk := data.read(4096):
sha256_hash.update(chunk)
return sha256_hash.hexdigest()
def _validate_sha256_checksum(data: bytes, sha256_expected: str | None = None) -> None:
if sha256_expected is None:
# No checksums available, e.g. because installing
# from a different location than PyPI.
return

actual = _generate_package_hash(data)
if actual != sha256_expected:
raise RuntimeError(
f"Invalid checksum: expected {sha256_expected}, got {actual}"
)


def _generate_package_hash(data: bytes) -> str:
return hashlib.sha256(data).hexdigest()
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def write_file(filename, contents):

tmp.seek(0)

return tmp
return tmp.read()


@pytest.fixture
Expand Down
Binary file not shown.
4 changes: 1 addition & 3 deletions tests/test_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,8 +370,6 @@ async def run_test(selenium, url, name, version):

@pytest.mark.asyncio
async def test_custom_index_urls(mock_package_index_json_api, monkeypatch):
from io import BytesIO

mock_server_fake_package = mock_package_index_json_api(
pkgs=["fake-pkg-micropip-test"]
)
Expand All @@ -381,7 +379,7 @@ async def test_custom_index_urls(mock_package_index_json_api, monkeypatch):
async def _mock_fetch_bytes(url, *args):
nonlocal _wheel_url
_wheel_url = url
return BytesIO(b"fake wheel")
return b"fake wheel"

from micropip import wheelinfo

Expand Down
Loading
Loading