From e58e86c1253e3dfa3e60ac386f762f86a23b2e29 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Tue, 24 Dec 2024 07:43:35 -0600 Subject: [PATCH] Add support for relative urls (#610) Add `base_url` parameter to `url_parse()` and implement new `req_url_relative()`. Fixes #449 --- NAMESPACE | 1 + NEWS.md | 2 ++ R/req-url.R | 14 ++++++++++++++ R/url.R | 10 ++++++++-- man/req_url.Rd | 8 ++++++++ man/url_parse.Rd | 8 +++++++- tests/testthat/test-req-url.R | 6 ++++++ tests/testthat/test-url.R | 8 ++++++++ 8 files changed, 54 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 0613c62a..9b303d2e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -95,6 +95,7 @@ export(req_url) export(req_url_path) export(req_url_path_append) export(req_url_query) +export(req_url_relative) export(req_user_agent) export(req_verbose) export(request) diff --git a/NEWS.md b/NEWS.md index f09f990e..58822493 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # httr2 (development version) +* New `req_url_relative()` for constructing relative urls (#449). +* `url_parse()` gains `base_url` argument so you can also use it to parse relative URLs (#449). * `url_parse()` now uses `curl::curl_parse_url()` which is much faster and more correct (#577). * `req_retry()` now defaults to `max_tries = 2` with a message. Set to `max_tries = 1` to disable retries. diff --git a/R/req-url.R b/R/req-url.R index a35b1489..cc317dbc 100644 --- a/R/req-url.R +++ b/R/req-url.R @@ -31,6 +31,11 @@ #' req |> #' req_url("http://google.com") #' +#' # Use a relative url +#' req <- request("http://example.com/a/b/c") +#' req |> req_url_relative("..") +#' req |> req_url_relative("/d/e/f") +#' #' # Use .multi to control what happens with vector parameters: #' req |> req_url_query(id = 100:105, .multi = "comma") #' req |> req_url_query(id = 100:105, .multi = "explode") @@ -47,6 +52,15 @@ req_url <- function(req, url) { req } +#' @export +#' @rdname req_url +req_url_relative <- function(req, url) { + check_request(req) + + new_url <- url_parse(url, base_url = req$url) + req_url(req, url_build(new_url)) +} + #' @export #' @rdname req_url #' @param .multi Controls what happens when an element of `...` is a vector diff --git a/R/url.R b/R/url.R index 518fe2d5..9b249fe0 100644 --- a/R/url.R +++ b/R/url.R @@ -6,6 +6,7 @@ #' #' @param url For `url_parse()` a string to parse into a URL; #' for `url_build()` a URL to turn back into a string. +#' @param base_url Use this as a parent, if `url` is a relative URL. #' @returns #' * `url_build()` returns a string. #' * `url_parse()` returns a URL: a S3 list with class `httr2_url` @@ -18,15 +19,20 @@ #' url_parse("http://google.com:80/?a=1&b=2") #' url_parse("http://username@google.com:80/path;test?a=1&b=2#40") #' +#' # You can parse a relative URL if you also provide a base url +#' url_parse("foo", "http://google.com/bar/") +#' url_parse("..", "http://google.com/bar/") +#' #' url <- url_parse("http://google.com/") #' url$port <- 80 #' url$hostname <- "example.com" #' url$query <- list(a = 1, b = 2, c = 3) #' url_build(url) -url_parse <- function(url) { +url_parse <- function(url, base_url = NULL) { check_string(url) + check_string(base_url, allow_null = TRUE) - curl <- curl::curl_parse_url(url) + curl <- curl::curl_parse_url(url, baseurl = base_url) parsed <- list( scheme = curl$scheme, diff --git a/man/req_url.Rd b/man/req_url.Rd index ca5b50b6..933e7a58 100644 --- a/man/req_url.Rd +++ b/man/req_url.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/req-url.R \name{req_url} \alias{req_url} +\alias{req_url_relative} \alias{req_url_query} \alias{req_url_path} \alias{req_url_path_append} @@ -9,6 +10,8 @@ \usage{ req_url(req, url) +req_url_relative(req, url) + req_url_query(.req, ..., .multi = c("error", "comma", "pipe", "explode")) req_url_path(req, ...) @@ -66,6 +69,11 @@ req |> req |> req_url("http://google.com") +# Use a relative url +req <- request("http://example.com/a/b/c") +req |> req_url_relative("..") +req |> req_url_relative("/d/e/f") + # Use .multi to control what happens with vector parameters: req |> req_url_query(id = 100:105, .multi = "comma") req |> req_url_query(id = 100:105, .multi = "explode") diff --git a/man/url_parse.Rd b/man/url_parse.Rd index 15a8d0aa..f36adeeb 100644 --- a/man/url_parse.Rd +++ b/man/url_parse.Rd @@ -5,13 +5,15 @@ \alias{url_build} \title{Parse and build URLs} \usage{ -url_parse(url) +url_parse(url, base_url = NULL) url_build(url) } \arguments{ \item{url}{For \code{url_parse()} a string to parse into a URL; for \code{url_build()} a URL to turn back into a string.} + +\item{base_url}{Use this as a parent, if \code{url} is a relative URL.} } \value{ \itemize{ @@ -32,6 +34,10 @@ url_parse("http://google.com:80/") url_parse("http://google.com:80/?a=1&b=2") url_parse("http://username@google.com:80/path;test?a=1&b=2#40") +# You can parse a relative URL if you also provide a base url +url_parse("foo", "http://google.com/bar/") +url_parse("..", "http://google.com/bar/") + url <- url_parse("http://google.com/") url$port <- 80 url$hostname <- "example.com" diff --git a/tests/testthat/test-req-url.R b/tests/testthat/test-req-url.R index aa55bcd2..a76ecfa8 100644 --- a/tests/testthat/test-req-url.R +++ b/tests/testthat/test-req-url.R @@ -96,6 +96,12 @@ test_that("can opt-out of query escaping", { expect_equal(req_url_query(req, a = I(","))$url, "http://example.com/?a=,") }) +test_that("can construct relative urls", { + req <- request("http://example.com/a/b/c.html") + expect_equal(req_url_relative(req, ".")$url, "http://example.com/a/b/") + expect_equal(req_url_relative(req, "..")$url, "http://example.com/a/") + expect_equal(req_url_relative(req, "/d/e/f")$url, "http://example.com/d/e/f") +}) # explode ----------------------------------------------------------------- test_that("explode handles expected inputs", { diff --git a/tests/testthat/test-url.R b/tests/testthat/test-url.R index 885d08bc..7cae86e7 100644 --- a/tests/testthat/test-url.R +++ b/tests/testthat/test-url.R @@ -21,6 +21,14 @@ test_that("can round trip urls", { expect_equal(map(urls, ~ url_build(url_parse(.x))), urls) }) +test_that("can parse relative urls", { + base <- "http://example.com/a/b/c/" + expect_equal(url_parse("d", base)$path, "/a/b/c/d") + expect_equal(url_parse("..", base)$path, "/a/b/") + + expect_equal(url_parse("//archive.org", base)$scheme, "http") +}) + test_that("can print all url details", { expect_snapshot( url_parse("http://user:pass@example.com:80/path?a=1&b=2&c={1{2}3}#frag")