Skip to content

Commit

Permalink
Improve alternative index handling.
Browse files Browse the repository at this point in the history
This improve the handling of indexes and error messages.

Before this PR all errors reaching an index would be ValueError, thus
not finding a wheel was the same as the index page having a non-handled
content type, or even failing to parse the html because of a typo.

This made it hard to debug, or even realize that an index url was wrong,
or that the pages served by the index were incorrect.

I thus introduce 3 New Errors that do not inherit from `ValueError`:
 - IndexMetadataFetchError
 - UnsupportedParserContentTypeError
 - WheelNotFoundError

The first two of which trigger real error, as they likely suggest a
problem with the index or the user config, and should not be ignored.

In addition the `verbose=` option was unconditionally setting the log
level. I think this is improper as if the logging level is set somewhere
else (to DEBUG, or something else), then it is overwritten.

- This then adds the option to pass `verbose=None`, (and make it the
  default), in which case it will not change the default.

python -m http.server will serve ContentType with `; charset=utf-8`,
which is not recognized.

- This now handle the case where the index ContentType header
  contains parameters for example `; charset=utf-8`, by discarding
  everything after the semicolon `;`; this is not proper parsing, but
  should be sufficient here.

 - I found that more debug logging would be useful and added a number
   of debug logs calls

With this I appear to get proper error message and debug statements
when trying to work on the Cors proxy.

Should close #123, and #121, and help with pyodide/pyodide#4898
  • Loading branch information
Carreau committed Jul 21, 2024
1 parent 5d4a869 commit fe7a0a0
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 21 deletions.
2 changes: 1 addition & 1 deletion micropip/_commands/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ async def install(
pre: bool = False,
index_urls: list[str] | str | None = None,
*,
verbose: bool | int = False,
verbose: bool | int | None = None,
) -> None:
if index_urls is None:
index_urls = package_index.INDEX_URLS[:]
Expand Down
2 changes: 1 addition & 1 deletion micropip/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ async def install(
pre: bool = False,
index_urls: list[str] | str | None = None,
*,
verbose: bool | int = False,
verbose: bool | int | None = None,
) -> None:
"""Install the given package and all of its dependencies.
Expand Down
20 changes: 9 additions & 11 deletions micropip/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,15 @@ def _set_formatter_once() -> None:
_logger.addHandler(ch)


def setup_logging(verbosity: int | bool) -> logging.Logger:
def setup_logging(verbosity: int | bool | None) -> logging.Logger:
_set_formatter_once()

if verbosity >= 2:
level_number = logging.DEBUG
elif verbosity == 1: # True == 1
level_number = logging.INFO
else:
level_number = logging.WARNING

assert _logger
_logger.setLevel(level_number)

if verbosity is not None:
if verbosity >= 2:
level_number = logging.DEBUG
elif verbosity == 1: # True == 1
level_number = logging.INFO
else:
level_number = logging.WARNING
_logger.setLevel(level_number)
return _logger
53 changes: 49 additions & 4 deletions micropip/package_index.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import logging
import string
import sys
from collections import defaultdict
Expand All @@ -20,6 +21,8 @@

_formatter = string.Formatter()

logger = logging.getLogger("micropip")


@dataclass
class ProjectInfo:
Expand Down Expand Up @@ -83,6 +86,7 @@ def from_simple_html_api(data: str, pkgname: str) -> "ProjectInfo":
https://peps.python.org/pep-0503
"""
logger.debug("Parsing simple_html_api for package: %r", pkgname)
project_detail = from_project_details_html(data, pkgname)
name, releases = ProjectInfo._parse_pep691_response(project_detail) # type: ignore[arg-type]
return ProjectInfo._compatible_only(name, releases)
Expand Down Expand Up @@ -123,6 +127,7 @@ def _parse_pep691_response(

releases[version].append(file)

logger.debug("Parsing pep691: %r, %r", name, releases)
return name, releases

@classmethod
Expand Down Expand Up @@ -215,19 +220,53 @@ def _contain_placeholder(url: str, placeholder: str = "package_name") -> bool:
return placeholder in fields


class UnsupportedParserContentTypeError(BaseException):
"""
Specific Error when trying to parse an PyPI Index.
.. note:
This used to be a ValueError, but cannot be a subclass of it, otherwise
we cannot determine whether we fail to parse the index, or just did not
found the wheels
"""

pass


def _select_parser(content_type: str, pkgname: str) -> Callable[[str], ProjectInfo]:
"""
Select the function to parse the response based on the content type.
"""
# This is not proper parsing of the content type, but to do so would require using
# either an external dependency like request, cgi (deprecated as of Python 3.13).
# we'll just drop all the parameters after the first ; as we want just the content-type:
# https://www.ietf.org/rfc/rfc2045.html#page-10
raw_content_type = content_type
if ";" in content_type:
content_type = content_type.split(";")[0].strip()

match content_type:
case "application/vnd.pypi.simple.v1+json":
logger.debug("Found parser for content type : %r", content_type)
return ProjectInfo.from_simple_json_api
case "application/json":
logger.debug("Found parser for content type : %r", content_type)
return ProjectInfo.from_json_api
case "application/vnd.pypi.simple.v1+html" | "text/html":
logger.debug("Found parser for content type : %r", content_type)
return partial(ProjectInfo.from_simple_html_api, pkgname=pkgname)
case _:
raise ValueError(f"Unsupported content type: {content_type}")
logger.debug("Unsupported parser content type : %r", content_type)
raise UnsupportedParserContentTypeError(
f"Unsupported content type: {raw_content_type}"
)


class IndexMetadataFetchError(BaseException):
pass


async def query_package(
Expand Down Expand Up @@ -264,6 +303,7 @@ async def query_package(
)

if index_urls is None:
logger.debug("No index url provided, falling back to %r", INDEX_URLS)
index_urls = INDEX_URLS
elif isinstance(index_urls, str):
index_urls = [index_urls]
Expand All @@ -273,17 +313,22 @@ async def query_package(
url = url.format(package_name=name)
else:
url = f"{url}/{name}/"
logger.debug("Searching index url %r", url)

try:
metadata, headers = await fetch_string_and_headers(url, _fetch_kwargs)
except OSError:
except OSError as e:
logger.debug("Error fetching %r, skipping. Error was %r", url, e)
continue

content_type = headers.get("content-type", "").lower()
parser = _select_parser(content_type, name)
return parser(metadata)
try:
return parser(metadata)
except ValueError as e:
raise IndexMetadataFetchError("Error parsing Index page") from e
else:
raise ValueError(
raise IndexMetadataFetchError(
f"Can't fetch metadata for '{name}'. "
"Please make sure you have entered a correct package name "
"and correctly specified index_urls (if you changed them)."
Expand Down
17 changes: 13 additions & 4 deletions micropip/transaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Transaction:
pyodide_packages: list[PackageMetadata] = field(default_factory=list)
failed: list[Requirement] = field(default_factory=list)

verbose: bool | int = False
verbose: bool | int | None = None

def __post_init__(self):
# If index_urls is None, pyodide-lock.json have to be searched first.
Expand Down Expand Up @@ -152,10 +152,15 @@ def eval_marker(e: dict[str, str]) -> bool:
else:
try:
await self._add_requirement_from_package_index(req)
except ValueError:
except WheelNotFoundError:
# If the requirement is not found in package index,
# we still have a chance to find it from pyodide lockfile.
if not self._add_requirement_from_pyodide_lock(req):
if self._add_requirement_from_pyodide_lock(req):
logger.debug(
"No wheel found for %r in index, falling back to pyodide lock file.",
req,
)
else:
raise
except ValueError:
self.failed.append(req)
Expand Down Expand Up @@ -238,6 +243,10 @@ async def add_wheel(
self.wheels.append(wheel)


class WheelNotFoundError(BaseException):
pass


def find_wheel(metadata: ProjectInfo, req: Requirement) -> WheelInfo:
"""Parse metadata to find the latest version of pure python wheel.
Parameters
Expand Down Expand Up @@ -278,7 +287,7 @@ def find_wheel(metadata: ProjectInfo, req: Requirement) -> WheelInfo:
if best_wheel is not None:
return wheel

raise ValueError(
raise WheelNotFoundError(
f"Can't find a pure Python 3 wheel for '{req}'.\n"
f"See: {FAQ_URLS['cant_find_wheel']}\n"
"You can use `await micropip.install(..., keep_going=True)` "
Expand Down

0 comments on commit fe7a0a0

Please sign in to comment.