Skip to content

Commit ed79b39

Browse files
committed
feat: use pypi_simple for PyPI simple API
The project https://pypi.org/project/pypi-simple/ provides an easy-to-use, convenient wrapper around PyPI's simple HTML and JSON API as well as a bunch of packaging PEPs. Replace our custom `html5lib` and `requests` library with pypi-simple. PyPI API calls now prefer the faster, more feature-rich JSON API and fall back to HTML for local index and GitLab. Fixes: #741 Signed-off-by: Christian Heimes <[email protected]>
1 parent 34f78d4 commit ed79b39

File tree

4 files changed

+136
-93
lines changed

4 files changed

+136
-93
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ requires-python = ">=3.11"
3333
dependencies = [
3434
"click>=8.1.7",
3535
"elfdeps>=0.2.0",
36-
"html5lib",
3736
"packaging",
3837
"pkginfo",
3938
"psutil",
4039
"pydantic",
40+
"pypi_simple",
4141
"pyproject_hooks>=1.0.0,!=1.1.0",
4242
"PyYAML",
4343
"rich",

src/fromager/resolver.py

Lines changed: 116 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,14 @@
1616
from platform import python_version
1717
from urllib.parse import quote, unquote, urljoin, urlparse
1818

19-
import html5lib
19+
import pypi_simple
2020
import resolvelib
2121
from packaging.requirements import Requirement
2222
from packaging.specifiers import InvalidSpecifier, SpecifierSet
2323
from packaging.tags import Tag, sys_tags
2424
from packaging.utils import (
2525
BuildTag,
2626
canonicalize_name,
27-
parse_sdist_filename,
2827
parse_wheel_filename,
2928
)
3029
from packaging.version import Version
@@ -184,149 +183,179 @@ def get_project_from_pypi(
184183
"""Return candidates created from the project name and extras."""
185184
found_candidates: set[str] = set()
186185
ignored_candidates: set[str] = set()
187-
simple_index_url = sdist_server_url.rstrip("/") + "/" + project + "/"
188-
logger.debug("%s: getting available versions from %s", project, simple_index_url)
186+
logger.debug("%s: getting available versions from %s", project, sdist_server_url)
189187

188+
client = pypi_simple.PyPISimple(
189+
endpoint=sdist_server_url,
190+
session=session,
191+
accept=pypi_simple.ACCEPT_JSON_PREFERRED,
192+
)
190193
try:
191-
response = session.get(simple_index_url)
192-
response.raise_for_status()
193-
data = response.content
194+
package = client.get_project_page(project)
194195
except Exception as e:
195196
logger.debug(
196-
"%s: failed to fetch package index from %s: %s",
197-
project,
198-
simple_index_url,
197+
"failed to fetch package index from %s: %s",
198+
sdist_server_url,
199199
e,
200200
)
201201
raise
202202

203-
doc = html5lib.parse(data, namespaceHTMLElements=False)
204-
for i in doc.findall(".//a"):
205-
candidate_url = urljoin(simple_index_url, i.attrib["href"])
206-
py_req = i.attrib.get("data-requires-python")
207-
# PEP 658: Check for metadata availability (PEP 714 data-core-metadata first)
208-
dist_info_metadata = i.attrib.get("data-core-metadata") or i.attrib.get(
209-
"data-dist-info-metadata"
210-
)
211-
# PEP 592: Check if package was yanked
212-
reason_data_yanked = i.attrib.get("data-yanked")
213-
# file names are URL quoted, "1.0%2Blocal" -> "1.0+local"
214-
filename = extract_filename_from_url(candidate_url)
215-
found_candidates.add(filename)
203+
# PEP 792 package status
204+
match package.status:
205+
case None:
206+
logger.debug("no package status")
207+
case pypi_simple.ProjectStatus.ACTIVE:
208+
logger.debug("project %r is active: %s", project, package.status_reason)
209+
case pypi_simple.ProjectStatus.DEPRECATED | pypi_simple.ProjectStatus.ARCHIVED:
210+
logger.warning(
211+
"project %r is no longer active: %r: %s",
212+
project,
213+
package.status,
214+
package.status_reason,
215+
)
216+
case pypi_simple.ProjectStatus.QUARANTINED:
217+
raise ValueError(
218+
f"project {project!r} is quarantined: {package.status_reason}"
219+
)
220+
case _:
221+
logger.warning(
222+
"project %r has unknown status %r: %s",
223+
project,
224+
package.status,
225+
package.status_reason,
226+
)
227+
228+
for dp in package.packages:
229+
found_candidates.add(dp.filename)
216230
if DEBUG_RESOLVER:
217-
logger.debug("%s: candidate %r -> %r", project, candidate_url, filename)
231+
logger.debug("candidate %r -> %r==%r", dp.url, dp.filename, dp.version)
218232

219-
# PEP 592: Skip items that were yanked
220-
if reason_data_yanked is not None:
233+
if (
234+
dp.project is None
235+
or dp.version is None
236+
or dp.package_type is None
237+
or len(dp.project) != len(project)
238+
):
239+
# Legacy file names that pypi_simple does not understand,
240+
# pypi_simple sets one or all fields to None.
241+
#
242+
# Look for and ignore cases like `cffi-1.0.2-2.tar.gz` which
243+
# produces the name `cffi-1-0-2`. We can't just compare the
244+
# names directly because of case and punctuation changes in
245+
# making names canonical and the way requirements are
246+
# expressed and there seems to be *no* way of producing sdist
247+
# filenames consistently, so we compare the length for this
248+
# case.
221249
if DEBUG_RESOLVER:
222250
logger.debug(
223-
"%s: skipping %s because it was yanked (%s)",
224-
project,
225-
filename,
226-
reason_data_yanked if reason_data_yanked else "no reason found",
251+
"skipping %r because 'pypi_simple' could not parse it or it's an invalid name",
252+
dp.filename,
227253
)
228-
ignored_candidates.add(filename)
254+
ignored_candidates.add(dp.filename)
229255
continue
230256

231-
# Construct metadata URL if PEP 658 metadata is available
232-
metadata_url = None
233-
if dist_info_metadata:
234-
# PEP 658: metadata is available at {file_url}.metadata
235-
metadata_url = candidate_url + ".metadata"
257+
if dp.package_type not in {"sdist", "wheel"}:
236258
if DEBUG_RESOLVER:
237259
logger.debug(
238-
"%s: PEP 658 metadata available at %s", project, metadata_url
260+
"skipping %r because it's not an sdist or wheel, got %r",
261+
dp.filename,
262+
dp.package_type,
239263
)
264+
ignored_candidates.add(dp.filename)
265+
continue
266+
267+
# PEP 592: Skip items that were yanked
268+
if dp.is_yanked:
269+
if DEBUG_RESOLVER:
270+
logger.debug(
271+
"skipping %s because it was yanked (%s)",
272+
dp.filename,
273+
dp.yanked_reason,
274+
)
275+
ignored_candidates.add(dp.filename)
276+
continue
277+
240278
# Skip items that need a different Python version
241-
if py_req:
279+
if dp.requires_python:
242280
try:
243-
matched_py: bool = match_py_req(py_req)
281+
matched_py: bool = match_py_req(dp.requires_python)
244282
except InvalidSpecifier as err:
245283
# Ignore files with invalid python specifiers
246284
# e.g. shellingham has files with ">= '2.7'"
247285
if DEBUG_RESOLVER:
248286
logger.debug(
249-
f"{project}: skipping {filename} because of an invalid python version specifier {py_req}: {err}"
287+
"skipping %r because of an invalid python version specifier %r: %s",
288+
dp.filename,
289+
dp.requires_python,
290+
err,
250291
)
251-
ignored_candidates.add(filename)
292+
ignored_candidates.add(dp.filename)
252293
continue
253294
if not matched_py:
254295
if DEBUG_RESOLVER:
255296
logger.debug(
256-
f"{project}: skipping {filename} because of python version {py_req}"
297+
"skipping %r because of python version %r",
298+
dp.filename,
299+
dp.requires_python,
257300
)
258-
ignored_candidates.add(filename)
301+
ignored_candidates.add(dp.filename)
259302
continue
260303

261304
# TODO: Handle compatibility tags?
262305

263306
try:
264-
if filename.endswith(".tar.gz") or filename.endswith(".zip"):
307+
if dp.package_type == "sdist":
265308
is_sdist = True
266-
name, version = parse_sdist_filename(filename)
309+
name: str = dp.project
310+
version: Version = Version(dp.version)
267311
tags: frozenset[Tag] = frozenset()
268312
build_tag: BuildTag = ()
269313
else:
270314
is_sdist = False
271-
name, version, build_tag, tags = parse_wheel_filename(filename)
272-
if tags:
273-
# FIXME: This doesn't take into account precedence of
274-
# the supported tags for best fit.
275-
matching_tags = SUPPORTED_TAGS.intersection(tags)
276-
if not matching_tags and ignore_platform:
277-
if DEBUG_RESOLVER:
278-
logger.debug(f"{project}: ignoring platform for {filename}")
279-
ignore_platform_tags: frozenset[Tag] = frozenset(
280-
Tag(t.interpreter, t.abi, IGNORE_PLATFORM) for t in tags
281-
)
282-
matching_tags = SUPPORTED_TAGS_IGNORE_PLATFORM.intersection(
283-
ignore_platform_tags
284-
)
285-
if not matching_tags:
286-
if DEBUG_RESOLVER:
287-
logger.debug(f"{project}: ignoring {filename} with tags {tags}")
288-
ignored_candidates.add(filename)
289-
continue
315+
name, version, build_tag, tags = parse_wheel_filename(dp.filename)
290316
except Exception as err:
291317
# Ignore files with invalid versions
292318
if DEBUG_RESOLVER:
293-
logger.debug(
294-
f'{project}: could not determine version for "{filename}": {err}'
295-
)
296-
ignored_candidates.add(filename)
297-
continue
298-
# Look for and ignore cases like `cffi-1.0.2-2.tar.gz` which
299-
# produces the name `cffi-1-0-2`. We can't just compare the
300-
# names directly because of case and punctuation changes in
301-
# making names canonical and the way requirements are
302-
# expressed and there seems to be *no* way of producing sdist
303-
# filenames consistently, so we compare the length for this
304-
# case.
305-
if len(name) != len(project):
306-
if DEBUG_RESOLVER:
307-
logger.debug(f'{project}: skipping invalid filename "{filename}"')
308-
ignored_candidates.add(filename)
319+
logger.debug("could not determine version for %r: %s", dp.filename, err)
320+
ignored_candidates.add(dp.filename)
309321
continue
310322

323+
if tags:
324+
# FIXME: This doesn't take into account precedence of
325+
# the supported tags for best fit.
326+
matching_tags = SUPPORTED_TAGS.intersection(tags)
327+
if not matching_tags and ignore_platform:
328+
if DEBUG_RESOLVER:
329+
logger.debug("ignoring platform for %r", dp.filename)
330+
ignore_platform_tags: frozenset[Tag] = frozenset(
331+
Tag(t.interpreter, t.abi, IGNORE_PLATFORM) for t in tags
332+
)
333+
matching_tags = SUPPORTED_TAGS_IGNORE_PLATFORM.intersection(
334+
ignore_platform_tags
335+
)
336+
if not matching_tags:
337+
if DEBUG_RESOLVER:
338+
logger.debug("ignoring %r with tags %r", dp.filename, tags)
339+
ignored_candidates.add(dp.filename)
340+
continue
341+
311342
c = Candidate(
312343
name,
313344
version,
314-
url=candidate_url,
345+
url=dp.url,
315346
extras=extras,
316347
is_sdist=is_sdist,
317348
build_tag=build_tag,
318-
metadata_url=metadata_url,
349+
metadata_url=dp.metadata_url if dp.has_metadata else None,
319350
)
320351
if DEBUG_RESOLVER:
321-
logger.debug(
322-
"%s: candidate %s (%s) %s", project, filename, c, candidate_url
323-
)
352+
logger.debug("candidate %s (%s) %s", dp.filename, c, dp.url)
324353
yield c
325354

326355
if not found_candidates:
327-
logger.info(f"{project}: found no candidate files at {simple_index_url}")
356+
logger.info("found no candidate files at %s", sdist_server_url)
328357
elif ignored_candidates == found_candidates:
329-
logger.info(f"{project}: ignored all candidate files at {simple_index_url}")
358+
logger.info("ignored all candidate files at %s", sdist_server_url)
330359

331360

332361
RequirementsMap: typing.TypeAlias = typing.Mapping[str, typing.Iterable[Requirement]]

src/fromager/server.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import functools
44
import http.server
5+
import io
56
import logging
67
import os
78
import pathlib
@@ -23,6 +24,19 @@ class LoggingHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
2324
def log_message(self, format: str, *args: typing.Any) -> None:
2425
logger.debug(format, *args)
2526

27+
def list_directory(self, path: str | os.PathLike[str]) -> io.BytesIO | None:
28+
# default list_directory() function appends an "@" to every symbolic
29+
# link. pypi_simple does not understand the "@". Rewrite the body
30+
# while keeping the same content length.
31+
old: io.BytesIO | None = super().list_directory(path)
32+
if old is None:
33+
return None
34+
new = io.BytesIO()
35+
for oldline in old:
36+
new.write(oldline.replace(b"@</a>", b"</a> "))
37+
new.seek(0)
38+
return new
39+
2640

2741
def start_wheel_server(ctx: context.WorkContext) -> None:
2842
update_wheel_mirror(ctx)

tests/test_resolver.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def test_provider_choose_wheel():
6262
candidate = result.mapping["hydra-core"]
6363
assert (
6464
candidate.url
65-
== "https://files.pythonhosted.org/packages/c6/50/e0edd38dcd63fb26a8547f13d28f7a008bc4a3fd4eb4ff030673f22ad41a/hydra_core-1.3.2-2-py3-none-any.whl#sha256=fa0238a9e31df3373b35b0bfb672c34cc92718d21f81311d8996a16de1141d8b"
65+
== "https://files.pythonhosted.org/packages/c6/50/e0edd38dcd63fb26a8547f13d28f7a008bc4a3fd4eb4ff030673f22ad41a/hydra_core-1.3.2-2-py3-none-any.whl"
6666
)
6767
assert str(candidate.version) == "1.3.2"
6868

@@ -196,7 +196,7 @@ def test_provider_choose_sdist():
196196
candidate = result.mapping["hydra-core"]
197197
assert (
198198
candidate.url
199-
== "https://files.pythonhosted.org/packages/6d/8e/07e42bc434a847154083b315779b0a81d567154504624e181caf2c71cd98/hydra-core-1.3.2.tar.gz#sha256=8a878ed67216997c3e9d88a8e72e7b4767e81af37afb4ea3334b269a4390a824"
199+
== "https://files.pythonhosted.org/packages/6d/8e/07e42bc434a847154083b315779b0a81d567154504624e181caf2c71cd98/hydra-core-1.3.2.tar.gz"
200200
)
201201
assert str(candidate.version) == "1.3.2"
202202

@@ -222,9 +222,9 @@ def test_provider_choose_either_with_constraint():
222222
candidate = result.mapping["hydra-core"]
223223
assert (
224224
candidate.url
225-
== "https://files.pythonhosted.org/packages/6d/8e/07e42bc434a847154083b315779b0a81d567154504624e181caf2c71cd98/hydra-core-1.3.2.tar.gz#sha256=8a878ed67216997c3e9d88a8e72e7b4767e81af37afb4ea3334b269a4390a824"
225+
== "https://files.pythonhosted.org/packages/6d/8e/07e42bc434a847154083b315779b0a81d567154504624e181caf2c71cd98/hydra-core-1.3.2.tar.gz"
226226
or candidate.url
227-
== "https://files.pythonhosted.org/packages/c6/50/e0edd38dcd63fb26a8547f13d28f7a008bc4a3fd4eb4ff030673f22ad41a/hydra_core-1.3.2-2-py3-none-any.whl#sha256=fa0238a9e31df3373b35b0bfb672c34cc92718d21f81311d8996a16de1141d8b"
227+
== "https://files.pythonhosted.org/packages/c6/50/e0edd38dcd63fb26a8547f13d28f7a008bc4a3fd4eb4ff030673f22ad41a/hydra_core-1.3.2-2-py3-none-any.whl"
228228
)
229229

230230

@@ -264,7 +264,7 @@ def test_provider_constraint_match():
264264
candidate = result.mapping["hydra-core"]
265265
assert (
266266
candidate.url
267-
== "https://files.pythonhosted.org/packages/6d/8e/07e42bc434a847154083b315779b0a81d567154504624e181caf2c71cd98/hydra-core-1.2.2.tar.gz#sha256=8a878ed67216997c3e9d88a8e72e7b4767e81af37afb4ea3334b269a4390a824"
267+
== "https://files.pythonhosted.org/packages/6d/8e/07e42bc434a847154083b315779b0a81d567154504624e181caf2c71cd98/hydra-core-1.2.2.tar.gz"
268268
)
269269
assert str(candidate.version) == "1.2.2"
270270

0 commit comments

Comments
 (0)