From 9db8f7e52bdc3c2c733d19cd7f7c42193ec93911 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Fri, 20 Dec 2024 15:31:03 -0600 Subject: [PATCH] Switch curl for url parsing (#582) Fixes #577 --- NEWS.md | 1 + R/url.R | 50 ++++++---------------- tests/testthat/_snaps/curl.md | 31 ++++++++------ tests/testthat/test-oauth-flow-auth-code.R | 4 +- tests/testthat/test-url.R | 10 ----- 5 files changed, 33 insertions(+), 63 deletions(-) diff --git a/NEWS.md b/NEWS.md index 64c1e8e3..f09f990e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # httr2 (development version) +* `url_parse()` now uses `curl::curl_parse_url()` which is much faster and more correct (#577). * `req_retry()` now defaults to `max_tries = 2` with a message. Set to `max_tries = 1` to disable retries. diff --git a/R/url.R b/R/url.R index 24680aba..518fe2d5 100644 --- a/R/url.R +++ b/R/url.R @@ -26,44 +26,20 @@ url_parse <- function(url) { check_string(url) - # https://datatracker.ietf.org/doc/html/rfc3986#appendix-B - pieces <- parse_match(url, "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?") - - scheme <- pieces[[2]] - authority <- pieces[[4]] - path <- pieces[[5]] - query <- pieces[[7]] - if (!is.null(query)) { - query <- query_parse(query) - } - fragment <- pieces[[9]] - - # https://datatracker.ietf.org/doc/html/rfc3986#section-3.2 - pieces <- parse_match(authority %||% "", "^(([^@]+)@)?([^:]+)?(:([^#]+))?") - - userinfo <- pieces[[2]] - if (!is.null(userinfo)) { - userinfo <- parse_in_half(userinfo, ":") - if (userinfo$right == "") { - userinfo$right <- NULL - } - } - hostname <- pieces[[3]] - port <- pieces[[5]] - - structure( - list( - scheme = scheme, - hostname = hostname, - username = userinfo$left, - password = userinfo$right, - port = port, - path = path, - query = query, - fragment = fragment - ), - class = "httr2_url" + curl <- curl::curl_parse_url(url) + + parsed <- list( + scheme = curl$scheme, + hostname = curl$host, + username = curl$user, + password = curl$password, + port = curl$port, + path = curl$path, + query = if (length(curl$params)) as.list(curl$params), + fragment = curl$fragment ) + class(parsed) <- "httr2_url" + parsed } url_modify <- function(url, ..., error_call = caller_env()) { diff --git a/tests/testthat/_snaps/curl.md b/tests/testthat/_snaps/curl.md index 3d58afc5..5d42dfa3 100644 --- a/tests/testthat/_snaps/curl.md +++ b/tests/testthat/_snaps/curl.md @@ -40,18 +40,18 @@ Code curl_translate("curl http://x.com") Output - request("http://x.com") |> + request("http://x.com/") |> req_perform() Code curl_translate("curl http://x.com -X DELETE") Output - request("http://x.com") |> + request("http://x.com/") |> req_method("DELETE") |> req_perform() Code curl_translate("curl http://x.com -H A:1") Output - request("http://x.com") |> + request("http://x.com/") |> req_headers( A = "1", ) |> @@ -59,7 +59,7 @@ Code curl_translate("curl http://x.com -H 'A B:1'") Output - request("http://x.com") |> + request("http://x.com/") |> req_headers( `A B` = "1", ) |> @@ -67,13 +67,13 @@ Code curl_translate("curl http://x.com -u u:p") Output - request("http://x.com") |> + request("http://x.com/") |> req_auth_basic("u", "p") |> req_perform() Code curl_translate("curl http://x.com --verbose") Output - request("http://x.com") |> + request("http://x.com/") |> req_perform(verbosity = 1) # can translate query @@ -81,7 +81,7 @@ Code curl_translate("curl http://x.com?string=abcde&b=2") Output - request("http://x.com") |> + request("http://x.com/") |> req_url_query( string = "abcde", b = "2", @@ -93,14 +93,14 @@ Code curl_translate("curl http://example.com --data abcdef") Output - request("http://example.com") |> + request("http://example.com/") |> req_body_raw("abcdef", "application/x-www-form-urlencoded") |> req_perform() Code curl_translate( "curl http://example.com --data abcdef -H Content-Type:text/plain") Output - request("http://example.com") |> + request("http://example.com/") |> req_body_raw("abcdef", "text/plain") |> req_perform() @@ -111,7 +111,7 @@ Message v Copying to clipboard: Output - request("http://example.com") |> + request("http://example.com/") |> req_headers( A = "1", B = "2", @@ -120,16 +120,19 @@ Code clipr::read_clip() Output - [1] "request(\"http://example.com\") |> " " req_headers(" - [3] " A = \"1\"," " B = \"2\"," - [5] " ) |> " " req_perform()" + [1] "request(\"http://example.com/\") |> " + [2] " req_headers(" + [3] " A = \"1\"," + [4] " B = \"2\"," + [5] " ) |> " + [6] " req_perform()" # encode_string2() produces simple strings Code curl_translate(cmd) Output - request("http://example.com") |> + request("http://example.com/") |> req_method("PATCH") |> req_body_raw('{"data":{"x":1,"y":"a","nested":{"z":[1,2,3]}}}', "application/json") |> req_perform() diff --git a/tests/testthat/test-oauth-flow-auth-code.R b/tests/testthat/test-oauth-flow-auth-code.R index 72cd8e6f..9d0da0b1 100644 --- a/tests/testthat/test-oauth-flow-auth-code.R +++ b/tests/testthat/test-oauth-flow-auth-code.R @@ -75,12 +75,12 @@ test_that("old args are deprecated", { expect_snapshot( redirect <- normalize_redirect_uri("http://localhost", port = 1234) ) - expect_equal(redirect$uri, "http://localhost:1234") + expect_equal(redirect$uri, "http://localhost:1234/") expect_snapshot( redirect <- normalize_redirect_uri("http://x.com", host_name = "y.com") ) - expect_equal(redirect$uri, "http://y.com") + expect_equal(redirect$uri, "http://y.com/") expect_snapshot( redirect <- normalize_redirect_uri("http://x.com", host_ip = "y.com") diff --git a/tests/testthat/test-url.R b/tests/testthat/test-url.R index 9dc3de0e..885d08bc 100644 --- a/tests/testthat/test-url.R +++ b/tests/testthat/test-url.R @@ -1,21 +1,11 @@ test_that("can parse special cases", { - url <- url_parse("//google.com") - expect_equal(url$scheme, NULL) - expect_equal(url$hostname, "google.com") - url <- url_parse("file:///tmp") expect_equal(url$scheme, "file") expect_equal(url$path, "/tmp") - - url <- url_parse("/") - expect_equal(url$scheme, NULL) - expect_equal(url$path, "/") }) test_that("can round trip urls", { urls <- list( - "/", - "//google.com", "file:///", "http://google.com/", "http://google.com/path",