From c4743700649bc5cf47492213eb8390ae57a4374a Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 16 Feb 2024 01:42:21 -0500 Subject: [PATCH] Allow empty fragments in HTML parser (#1443) ## Summary It looks like `devpi` might add an empty fragment (`#`) at the end of the URL. We expect it to contain the hash; this just makes empty-fragment map to "no hash". Closes https://github.com/astral-sh/uv/issues/1441. --- crates/uv-client/src/html.rs | 62 ++++++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/crates/uv-client/src/html.rs b/crates/uv-client/src/html.rs index 397cc014281d..bfd432337b73 100644 --- a/crates/uv-client/src/html.rs +++ b/crates/uv-client/src/html.rs @@ -108,10 +108,17 @@ impl SimpleHtml { .ok_or(Error::MissingHref)?; let href = std::str::from_utf8(href.as_bytes())?; + // Extract the hash, which should be in the fragment. let decoded = html_escape::decode_html_entities(href); let (path, hashes) = if let Some((path, fragment)) = decoded.split_once('#') { - // Extract the hash, which should be in the fragment. - (path, Self::parse_hash(fragment)?) + ( + path, + if fragment.trim().is_empty() { + Hashes::default() + } else { + Self::parse_hash(fragment)? + }, + ) } else { (href, Hashes::default()) }; @@ -455,6 +462,57 @@ mod tests { insta::assert_display_snapshot!(result, @"Missing href attribute on anchor link"); } + #[test] + fn parse_empty_fragment() { + let text = r#" + + + +

Links for jinja2

+ Jinja2-3.1.2-py3-none-any.whl
+ + + + "#; + let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap(); + let result = SimpleHtml::parse(text, &base).unwrap(); + insta::assert_debug_snapshot!(result, @r###" + SimpleHtml { + base: BaseUrl( + Url { + scheme: "https", + cannot_be_a_base: false, + username: "", + password: None, + host: Some( + Domain( + "download.pytorch.org", + ), + ), + port: None, + path: "/whl/jinja2/", + query: None, + fragment: None, + }, + ), + files: [ + File { + dist_info_metadata: None, + filename: "Jinja2-3.1.2-py3-none-any.whl", + hashes: Hashes { + sha256: None, + }, + requires_python: None, + size: None, + upload_time: None, + url: "/whl/Jinja2-3.1.2-py3-none-any.whl#", + yanked: None, + }, + ], + } + "###); + } + #[test] fn parse_missing_hash_value() { let text = r#"