Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MAINT Store data in bytes not io.BytesIO #91

Merged
merged 8 commits into from
Dec 28, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions micropip/_compat_in_pyodide.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from io import BytesIO
from typing import IO
from pathlib import Path
from urllib.parse import urlparse

from pyodide._package_loader import get_dynlibs
Expand All @@ -20,15 +19,15 @@
# Otherwise, this is pytest test collection so let it go.


async def fetch_bytes(url: str, kwargs: dict[str, str]) -> IO[bytes]:
async def fetch_bytes(url: str, kwargs: dict[str, str]) -> bytes:
parsed_url = urlparse(url)
if parsed_url.scheme == "emfs":
return open(parsed_url.path, "rb")
if parsed_url.scheme == "file":
result_bytes = Path(parsed_url.path).read_bytes()
elif parsed_url.scheme == "file":
result_bytes = (await loadBinaryFile(parsed_url.path)).to_bytes()
else:
result_bytes = await (await pyfetch(url, **kwargs)).bytes()
return BytesIO(result_bytes)
return result_bytes
Copy link
Member

@hoodmane hoodmane Oct 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we don't do anything with result_bytes anymore, maybe tidy as follows:

async def fetch_bytes(url: str, kwargs: dict[str, str]) -> bytes:
    parsed_url = urlparse(url)
    if parsed_url.scheme == "emfs":
        return Path(parsed_url.path).read_bytes()
    if parsed_url.scheme == "file":
        return (await loadBinaryFile(parsed_url.path)).to_bytes()
    return await (await pyfetch(url, **kwargs)).bytes()



async def fetch_string_and_headers(
Expand Down
5 changes: 2 additions & 3 deletions micropip/_compat_not_in_pyodide.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import re
from io import BytesIO
from pathlib import Path
from typing import IO, Any

Expand All @@ -20,9 +19,9 @@ def _fetch(url: str, kwargs: dict[str, Any]) -> addinfourl:
return urlopen(Request(url, **kwargs))


async def fetch_bytes(url: str, kwargs: dict[str, Any]) -> IO[bytes]:
async def fetch_bytes(url: str, kwargs: dict[str, Any]) -> bytes:
response = _fetch(url, kwargs=kwargs)
return BytesIO(response.read())
return response.read()
Copy link
Member

@hoodmane hoodmane Oct 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe:

    return _fetch(url, kwargs=kwargs).read()



async def fetch_string_and_headers(
Expand Down
46 changes: 20 additions & 26 deletions micropip/wheelinfo.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import asyncio
import hashlib
import io
import json
import zipfile
from dataclasses import dataclass
from pathlib import Path
from typing import IO, Any
from typing import Any
from urllib.parse import ParseResult, urlparse

from packaging.requirements import Requirement
Expand Down Expand Up @@ -39,7 +40,7 @@ class WheelInfo:

# Fields below are only available after downloading the wheel, i.e. after calling `download()`.

_data: IO[bytes] | None = None # Wheel file contents.
_data: bytes | None = None # Wheel file contents.
_metadata: Metadata | None = None # Wheel metadata.
_requires: list[Requirement] | None = None # List of requirements.

Expand Down Expand Up @@ -109,7 +110,7 @@ async def install(self, target: Path) -> None:
raise RuntimeError(
"Micropip internal error: attempted to install wheel before downloading it?"
)
self._validate()
_validate_sha256_checksum(self._data, self.sha256)
self._extract(target)
await self._load_libraries(target)
self._set_installer()
Expand All @@ -119,7 +120,7 @@ async def download(self, fetch_kwargs: dict[str, Any]):
return

self._data = await self._fetch_bytes(fetch_kwargs)
with zipfile.ZipFile(self._data) as zf:
with zipfile.ZipFile(io.BytesIO(self._data)) as zf:
metadata_path = wheel_dist_info_dir(zf, self.name) + "/" + Metadata.PKG_INFO
self._metadata = Metadata(zipfile.Path(zf, metadata_path))

Expand Down Expand Up @@ -153,20 +154,9 @@ async def _fetch_bytes(self, fetch_kwargs: dict[str, Any]):
"Check if the server is sending the correct 'Access-Control-Allow-Origin' header."
) from e

def _validate(self):
if self.sha256 is None:
# No checksums available, e.g. because installing
# from a different location than PyPI.
return

assert self._data
sha256_actual = _generate_package_hash(self._data)
if sha256_actual != self.sha256:
raise ValueError("Contents don't match hash")

def _extract(self, target: Path) -> None:
assert self._data
with zipfile.ZipFile(self._data) as zf:
with zipfile.ZipFile(io.BytesIO(self._data)) as zf:
zf.extractall(target)
self._dist_info = target / wheel_dist_info_dir(zf, self.name)

Expand Down Expand Up @@ -198,16 +188,20 @@ async def _load_libraries(self, target: Path) -> None:
TODO: integrate with pyodide's dynamic library loading mechanism.
"""
assert self._data
dynlibs = get_dynlibs(self._data, ".whl", target)
dynlibs = get_dynlibs(io.BytesIO(self._data), ".whl", target)
await asyncio.gather(*map(lambda dynlib: loadDynlib(dynlib, False), dynlibs))


def _generate_package_hash(data: IO[bytes]) -> str:
"""
Generate a SHA256 hash of the package data.
"""
sha256_hash = hashlib.sha256()
data.seek(0)
while chunk := data.read(4096):
sha256_hash.update(chunk)
return sha256_hash.hexdigest()
def _validate_sha256_checksum(data: bytes, expected: str | None = None) -> None:
if expected is None:
# No checksums available, e.g. because installing
# from a different location than PyPI.
return

actual = _generate_package_hash(data)
if actual != expected:
raise RuntimeError(f"Invalid checksum: expected {expected}, got {actual}")


def _generate_package_hash(data: bytes) -> str:
return hashlib.sha256(data).hexdigest()
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def write_file(filename, contents):

tmp.seek(0)

return tmp
return tmp.read()


@pytest.fixture
Expand Down
4 changes: 1 addition & 3 deletions tests/test_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,8 +370,6 @@ async def run_test(selenium, url, name, version):

@pytest.mark.asyncio
async def test_custom_index_urls(mock_package_index_json_api, monkeypatch):
from io import BytesIO

mock_server_fake_package = mock_package_index_json_api(
pkgs=["fake-pkg-micropip-test"]
)
Expand All @@ -381,7 +379,7 @@ async def test_custom_index_urls(mock_package_index_json_api, monkeypatch):
async def _mock_fetch_bytes(url, *args):
nonlocal _wheel_url
_wheel_url = url
return BytesIO(b"fake wheel")
return b"fake wheel"

from micropip import wheelinfo

Expand Down
23 changes: 1 addition & 22 deletions tests/test_wheelinfo.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from io import BytesIO

import pytest
from conftest import PYTEST_WHEEL, TEST_WHEEL_DIR

Expand All @@ -13,7 +11,7 @@ def dummy_wheel():

@pytest.fixture
def dummy_wheel_content():
yield BytesIO((TEST_WHEEL_DIR / PYTEST_WHEEL).read_bytes())
yield (TEST_WHEEL_DIR / PYTEST_WHEEL).read_bytes()


@pytest.fixture
Expand Down Expand Up @@ -56,25 +54,6 @@ def test_from_package_index():
assert wheel.sha256 == sha256


def test_validate(dummy_wheel):
import hashlib

dummy_wheel.sha256 = None
dummy_wheel._data = BytesIO(b"dummy-data")

# Should succeed when sha256 is None
dummy_wheel._validate()

# Should fail when checksum is different
dummy_wheel.sha256 = "dummy-sha256"
with pytest.raises(ValueError, match="Contents don't match hash"):
dummy_wheel._validate()

# Should succeed when checksum is the same
dummy_wheel.sha256 = hashlib.sha256(b"dummy-data").hexdigest()
dummy_wheel._validate()


def test_extract(dummy_wheel, dummy_wheel_content, tmp_path):
dummy_wheel._data = dummy_wheel_content
dummy_wheel._extract(tmp_path)
Expand Down