diff --git a/micropip/_compat_in_pyodide.py b/micropip/_compat_in_pyodide.py index 02a5068..5d61c4f 100644 --- a/micropip/_compat_in_pyodide.py +++ b/micropip/_compat_in_pyodide.py @@ -1,5 +1,4 @@ -from io import BytesIO -from typing import IO +from pathlib import Path from urllib.parse import urlparse from pyodide._package_loader import get_dynlibs @@ -20,15 +19,14 @@ # Otherwise, this is pytest test collection so let it go. -async def fetch_bytes(url: str, kwargs: dict[str, str]) -> IO[bytes]: +async def fetch_bytes(url: str, kwargs: dict[str, str]) -> bytes: parsed_url = urlparse(url) if parsed_url.scheme == "emfs": - return open(parsed_url.path, "rb") + return Path(parsed_url.path).read_bytes() if parsed_url.scheme == "file": - result_bytes = (await loadBinaryFile(parsed_url.path)).to_bytes() - else: - result_bytes = await (await pyfetch(url, **kwargs)).bytes() - return BytesIO(result_bytes) + return (await loadBinaryFile(parsed_url.path)).to_bytes() + + return await (await pyfetch(url, **kwargs)).bytes() async def fetch_string_and_headers( diff --git a/micropip/_compat_not_in_pyodide.py b/micropip/_compat_not_in_pyodide.py index c229aa9..bdd81d2 100644 --- a/micropip/_compat_not_in_pyodide.py +++ b/micropip/_compat_not_in_pyodide.py @@ -1,5 +1,4 @@ import re -from io import BytesIO from pathlib import Path from typing import IO, Any @@ -20,9 +19,8 @@ def _fetch(url: str, kwargs: dict[str, Any]) -> addinfourl: return urlopen(Request(url, **kwargs)) -async def fetch_bytes(url: str, kwargs: dict[str, Any]) -> IO[bytes]: - response = _fetch(url, kwargs=kwargs) - return BytesIO(response.read()) +async def fetch_bytes(url: str, kwargs: dict[str, Any]) -> bytes: + return _fetch(url, kwargs=kwargs).read() async def fetch_string_and_headers( diff --git a/micropip/wheelinfo.py b/micropip/wheelinfo.py index e831fdd..75975cf 100644 --- a/micropip/wheelinfo.py +++ b/micropip/wheelinfo.py @@ -1,10 +1,11 @@ import asyncio import hashlib +import io import json import zipfile from dataclasses import dataclass from pathlib import Path -from typing import IO, Any +from typing import Any from urllib.parse import ParseResult, urlparse from packaging.requirements import Requirement @@ -39,7 +40,7 @@ class WheelInfo: # Fields below are only available after downloading the wheel, i.e. after calling `download()`. - _data: IO[bytes] | None = None # Wheel file contents. + _data: bytes | None = None # Wheel file contents. _metadata: Metadata | None = None # Wheel metadata. _requires: list[Requirement] | None = None # List of requirements. @@ -109,7 +110,7 @@ async def install(self, target: Path) -> None: raise RuntimeError( "Micropip internal error: attempted to install wheel before downloading it?" ) - self._validate() + _validate_sha256_checksum(self._data, self.sha256) self._extract(target) await self._load_libraries(target) self._set_installer() @@ -119,7 +120,7 @@ async def download(self, fetch_kwargs: dict[str, Any]): return self._data = await self._fetch_bytes(fetch_kwargs) - with zipfile.ZipFile(self._data) as zf: + with zipfile.ZipFile(io.BytesIO(self._data)) as zf: metadata_path = wheel_dist_info_dir(zf, self.name) + "/" + Metadata.PKG_INFO self._metadata = Metadata(zipfile.Path(zf, metadata_path)) @@ -153,20 +154,9 @@ async def _fetch_bytes(self, fetch_kwargs: dict[str, Any]): "Check if the server is sending the correct 'Access-Control-Allow-Origin' header." ) from e - def _validate(self): - if self.sha256 is None: - # No checksums available, e.g. because installing - # from a different location than PyPI. - return - - assert self._data - sha256_actual = _generate_package_hash(self._data) - if sha256_actual != self.sha256: - raise ValueError("Contents don't match hash") - def _extract(self, target: Path) -> None: assert self._data - with zipfile.ZipFile(self._data) as zf: + with zipfile.ZipFile(io.BytesIO(self._data)) as zf: zf.extractall(target) self._dist_info = target / wheel_dist_info_dir(zf, self.name) @@ -198,16 +188,20 @@ async def _load_libraries(self, target: Path) -> None: TODO: integrate with pyodide's dynamic library loading mechanism. """ assert self._data - dynlibs = get_dynlibs(self._data, ".whl", target) + dynlibs = get_dynlibs(io.BytesIO(self._data), ".whl", target) await asyncio.gather(*map(lambda dynlib: loadDynlib(dynlib, False), dynlibs)) -def _generate_package_hash(data: IO[bytes]) -> str: - """ - Generate a SHA256 hash of the package data. - """ - sha256_hash = hashlib.sha256() - data.seek(0) - while chunk := data.read(4096): - sha256_hash.update(chunk) - return sha256_hash.hexdigest() +def _validate_sha256_checksum(data: bytes, expected: str | None = None) -> None: + if expected is None: + # No checksums available, e.g. because installing + # from a different location than PyPI. + return + + actual = _generate_package_hash(data) + if actual != expected: + raise RuntimeError(f"Invalid checksum: expected {expected}, got {actual}") + + +def _generate_package_hash(data: bytes) -> str: + return hashlib.sha256(data).hexdigest() diff --git a/tests/conftest.py b/tests/conftest.py index cca0c4d..0900dc4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -257,7 +257,7 @@ def write_file(filename, contents): tmp.seek(0) - return tmp + return tmp.read() @pytest.fixture diff --git a/tests/test_data/test_wheel_uninstall/pyproject.toml b/tests/test_data/test_wheel_uninstall/pyproject.toml index e72e352..63eb12d 100644 --- a/tests/test_data/test_wheel_uninstall/pyproject.toml +++ b/tests/test_data/test_wheel_uninstall/pyproject.toml @@ -1,11 +1,11 @@ [project] -name = "test_wheel_uninstall" +name = "test-wheel-uninstall" description = "Test wheel uninstall" requires-python = ">=3.10" version = "1.0.0" [tool.setuptools] -packages = ["deep", "deep.deep", "shallow", "test_wheel_uninstall"] +packages = ["deep", "deep.deep", "shallow", "test_wheel_uninstall", "deep.data"] py-modules = ["top_level"] [tool.setuptools.package-data] diff --git a/tests/test_install.py b/tests/test_install.py index 12848d4..adcae86 100644 --- a/tests/test_install.py +++ b/tests/test_install.py @@ -370,8 +370,6 @@ async def run_test(selenium, url, name, version): @pytest.mark.asyncio async def test_custom_index_urls(mock_package_index_json_api, monkeypatch): - from io import BytesIO - mock_server_fake_package = mock_package_index_json_api( pkgs=["fake-pkg-micropip-test"] ) @@ -381,7 +379,7 @@ async def test_custom_index_urls(mock_package_index_json_api, monkeypatch): async def _mock_fetch_bytes(url, *args): nonlocal _wheel_url _wheel_url = url - return BytesIO(b"fake wheel") + return b"fake wheel" from micropip import wheelinfo diff --git a/tests/test_uninstall.py b/tests/test_uninstall.py index 6958d63..452795a 100644 --- a/tests/test_uninstall.py +++ b/tests/test_uninstall.py @@ -3,9 +3,10 @@ import pytest from pytest_pyodide import run_in_pyodide, spawn_web_server from conftest import SNOWBALL_WHEEL, TEST_WHEEL_DIR -from packaging.utils import parse_wheel_filename +from packaging.utils import parse_wheel_filename, canonicalize_name TEST_PACKAGE_NAME = "test_wheel_uninstall" +TEST_PACKAGE_NAME_NORMALIZED = canonicalize_name(TEST_PACKAGE_NAME) @pytest.fixture(scope="module") @@ -19,7 +20,7 @@ def test_wheel_url(test_wheel_path): def test_basic(selenium_standalone_micropip, test_wheel_url): @run_in_pyodide() - async def run(selenium, pkg_name, wheel_url): + async def run(selenium, pkg_name, pkg_name_normalized, wheel_url): import importlib.metadata import sys @@ -27,7 +28,7 @@ async def run(selenium, pkg_name, wheel_url): await micropip.install(wheel_url) - assert pkg_name in micropip.list() + assert pkg_name_normalized in micropip.list() assert pkg_name not in sys.modules __import__(pkg_name) @@ -52,7 +53,12 @@ async def run(selenium, pkg_name, wheel_url): # 3. Check that the module is not available with micropip.list() assert pkg_name not in micropip.list() - run(selenium_standalone_micropip, TEST_PACKAGE_NAME, test_wheel_url) + run( + selenium_standalone_micropip, + TEST_PACKAGE_NAME, + TEST_PACKAGE_NAME_NORMALIZED, + test_wheel_url, + ) def test_files(selenium_standalone_micropip, test_wheel_url): @@ -61,13 +67,13 @@ def test_files(selenium_standalone_micropip, test_wheel_url): """ @run_in_pyodide() - async def run(selenium, pkg_name, wheel_url): + async def run(selenium, pkg_name, pkg_name_normalized, wheel_url): import importlib.metadata import micropip await micropip.install(wheel_url) - assert pkg_name in micropip.list() + assert pkg_name_normalized in micropip.list() dist = importlib.metadata.distribution(pkg_name) files = dist.files @@ -86,7 +92,12 @@ async def run(selenium, pkg_name, wheel_url): assert not dist._path.is_dir(), f"{dist._path} still exists after removal" - run(selenium_standalone_micropip, TEST_PACKAGE_NAME, test_wheel_url) + run( + selenium_standalone_micropip, + TEST_PACKAGE_NAME, + TEST_PACKAGE_NAME_NORMALIZED, + test_wheel_url, + ) def test_install_again(selenium_standalone_micropip, test_wheel_url): @@ -95,20 +106,20 @@ def test_install_again(selenium_standalone_micropip, test_wheel_url): """ @run_in_pyodide() - async def run(selenium, pkg_name, wheel_url): + async def run(selenium, pkg_name, pkg_name_normalized, wheel_url): import sys import micropip await micropip.install(wheel_url) - assert pkg_name in micropip.list() + assert pkg_name_normalized in micropip.list() __import__(pkg_name) micropip.uninstall(pkg_name) - assert pkg_name not in micropip.list() + assert pkg_name_normalized not in micropip.list() del sys.modules[pkg_name] @@ -121,10 +132,15 @@ async def run(selenium, pkg_name, wheel_url): await micropip.install(wheel_url) - assert pkg_name in micropip.list() + assert pkg_name_normalized in micropip.list() __import__(pkg_name) - run(selenium_standalone_micropip, TEST_PACKAGE_NAME, test_wheel_url) + run( + selenium_standalone_micropip, + TEST_PACKAGE_NAME, + TEST_PACKAGE_NAME_NORMALIZED, + test_wheel_url, + ) def test_warning_not_installed(selenium_standalone_micropip): @@ -156,7 +172,7 @@ def test_warning_file_removed(selenium_standalone_micropip, test_wheel_url): """ @run_in_pyodide() - async def run(selenium, pkg_name, wheel_url): + async def run(selenium, pkg_name, pkg_name_normalized, wheel_url): from importlib.metadata import distribution import micropip import contextlib @@ -165,9 +181,9 @@ async def run(selenium, pkg_name, wheel_url): with io.StringIO() as buf, contextlib.redirect_stdout(buf): await micropip.install(wheel_url) - assert pkg_name in micropip.list() + assert pkg_name_normalized in micropip.list() - dist = distribution(pkg_name) + dist = distribution(pkg_name_normalized) files = dist.files file1 = files[0] file2 = files[1] @@ -175,7 +191,7 @@ async def run(selenium, pkg_name, wheel_url): file1.locate().unlink() file2.locate().unlink() - micropip.uninstall(pkg_name) + micropip.uninstall(pkg_name_normalized) captured = buf.getvalue() logs = captured.strip().split("\n") @@ -184,7 +200,12 @@ async def run(selenium, pkg_name, wheel_url): assert "does not exist" in logs[-1] assert "does not exist" in logs[-2] - run(selenium_standalone_micropip, TEST_PACKAGE_NAME, test_wheel_url) + run( + selenium_standalone_micropip, + TEST_PACKAGE_NAME, + TEST_PACKAGE_NAME_NORMALIZED, + test_wheel_url, + ) def test_warning_remaining_file(selenium_standalone_micropip, test_wheel_url): @@ -193,7 +214,7 @@ def test_warning_remaining_file(selenium_standalone_micropip, test_wheel_url): """ @run_in_pyodide() - async def run(selenium, pkg_name, wheel_url): + async def run(selenium, pkg_name, pkg_name_normalized, wheel_url): from importlib.metadata import distribution import micropip import contextlib @@ -201,12 +222,12 @@ async def run(selenium, pkg_name, wheel_url): with io.StringIO() as buf, contextlib.redirect_stdout(buf): await micropip.install(wheel_url) - assert pkg_name in micropip.list() + assert pkg_name_normalized in micropip.list() - pkg_dir = distribution(pkg_name)._path.parent / "deep" + pkg_dir = distribution(pkg_name_normalized)._path.parent / "deep" (pkg_dir / "extra-file.txt").touch() - micropip.uninstall(pkg_name) + micropip.uninstall(pkg_name_normalized) captured = buf.getvalue() logs = captured.strip().split("\n") @@ -214,7 +235,12 @@ async def run(selenium, pkg_name, wheel_url): assert len(logs) == 1 assert "is not empty after uninstallation" in logs[0] - run(selenium_standalone_micropip, TEST_PACKAGE_NAME, test_wheel_url) + run( + selenium_standalone_micropip, + TEST_PACKAGE_NAME, + TEST_PACKAGE_NAME_NORMALIZED, + test_wheel_url, + ) def test_pyodide_repodata(selenium_standalone_micropip): diff --git a/tests/test_wheelinfo.py b/tests/test_wheelinfo.py index a5af701..8e027b5 100644 --- a/tests/test_wheelinfo.py +++ b/tests/test_wheelinfo.py @@ -1,5 +1,3 @@ -from io import BytesIO - import pytest from conftest import PYTEST_WHEEL, TEST_WHEEL_DIR @@ -13,7 +11,7 @@ def dummy_wheel(): @pytest.fixture def dummy_wheel_content(): - yield BytesIO((TEST_WHEEL_DIR / PYTEST_WHEEL).read_bytes()) + yield (TEST_WHEEL_DIR / PYTEST_WHEEL).read_bytes() @pytest.fixture @@ -56,25 +54,6 @@ def test_from_package_index(): assert wheel.sha256 == sha256 -def test_validate(dummy_wheel): - import hashlib - - dummy_wheel.sha256 = None - dummy_wheel._data = BytesIO(b"dummy-data") - - # Should succeed when sha256 is None - dummy_wheel._validate() - - # Should fail when checksum is different - dummy_wheel.sha256 = "dummy-sha256" - with pytest.raises(ValueError, match="Contents don't match hash"): - dummy_wheel._validate() - - # Should succeed when checksum is the same - dummy_wheel.sha256 = hashlib.sha256(b"dummy-data").hexdigest() - dummy_wheel._validate() - - def test_extract(dummy_wheel, dummy_wheel_content, tmp_path): dummy_wheel._data = dummy_wheel_content dummy_wheel._extract(tmp_path)