Skip to content

Commit

Permalink
More tools for manipulating urls: relative + query params (#614)
Browse files Browse the repository at this point in the history
Fixes #425
  • Loading branch information
hadley authored Jan 6, 2025
1 parent 16a1048 commit f9bc4c9
Show file tree
Hide file tree
Showing 15 changed files with 363 additions and 99 deletions.
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,11 @@ export(signal_total_pages)
export(throttle_status)
export(url_build)
export(url_modify)
export(url_modify_query)
export(url_modify_relative)
export(url_parse)
export(url_query_build)
export(url_query_parse)
export(with_mock)
export(with_mocked_responses)
export(with_verbosity)
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# httr2 (development version)

* New `url_modify()`, `url_modify_query()`, and `url_modify_relative()` make it easier to modify an existing url (#464).
* New `url_query_parse()` and `url_query_build()` allow you to parse and build a query string (#425).
* `req_url_query()` gains the ability to control how spaces are encoded (#432).
* New `resp_request()` aids debugging by returning the request associated with a response (#604).
* `print.request()` now correctly escapes `{}` in headers (#586).
* New `req_headers_redacted()` provides a user-friendlier way to set redacted headers (#561).
* `resp_link_url()` now works if there are multiple `Link` headers (#587).
* New `url_modify()` makes it easier to modify an existing url (#464).
* New `req_url_relative()` for constructing relative urls (#449).
* `url_parse()` gains `base_url` argument so you can also use it to parse relative URLs (#449).
* `url_parse()` now uses `curl::curl_parse_url()` which is much faster and more correct (#577).
Expand Down
2 changes: 1 addition & 1 deletion R/oauth-flow-auth-code.R
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ oauth_flow_auth_code_listen <- function(redirect_uri = "http://localhost:1410")
# https://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1
# Spaces are first replaced by +
parse_form_urlencoded <- function(query) {
query <- query_parse(query)
query <- url_query_parse(query)
query[] <- gsub("+", " ", query, fixed = TRUE)
query
}
Expand Down
2 changes: 1 addition & 1 deletion R/req-auth-aws.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ aws_v4_signature <- function(method,
CanonicalQueryString <- ""
} else {
sorted_query <- url$query[order(names(url$query))]
CanonicalQueryString <- query_build(CanonicalQueryString)
CanonicalQueryString <- url_query_build(CanonicalQueryString)
}

headers$host <- url$hostname
Expand Down
2 changes: 1 addition & 1 deletion R/req-body.R
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ req_body_get <- function(req) {
raw = req$body$data,
form = {
data <- unobfuscate(req$body$data)
query_build(data)
url_query_build(data)
},
json = exec(jsonlite::toJSON, req$body$data, !!!req$body$params),
cli::cli_abort("Unsupported request body type {.str {req$body$type}}.")
Expand Down
33 changes: 8 additions & 25 deletions R/req-url.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
#' * `req_url_path()` modifies the path
#' * `req_url_path_append()` adds to the path
#'
#' Alternatively, to modify only a URL without creating a request,
#' you can instead use [url_modify()] and friends.
#'
#' @inheritParams req_perform
#' @param url New URL; completely replaces existing.
#' @param ... For `req_url_query()`: <[`dynamic-dots`][rlang::dyn-dots]>
Expand Down Expand Up @@ -56,39 +59,19 @@ req_url <- function(req, url) {
#' @rdname req_url
req_url_relative <- function(req, url) {
check_request(req)

new_url <- url_parse(url, base_url = req$url)
req_url(req, url_build(new_url))
req_url(req, url_modify_relative(req$url, url))
}

#' @export
#' @rdname req_url
#' @param .multi Controls what happens when an element of `...` is a vector
#' containing multiple values:
#'
#' * `"error"`, the default, throws an error.
#' * `"comma"`, separates values with a `,`, e.g. `?x=1,2`.
#' * `"pipe"`, separates values with a `|`, e.g. `?x=1|2`.
#' * `"explode"`, turns each element into its own parameter, e.g. `?x=1&x=2`
#'
#' If none of these options work for your needs, you can instead supply a
#' function that takes a character vector of argument values and returns a
#' a single string.
#' @param .space How should spaces in query params be escaped? The default,
#' "percent", uses standard percent encoding (i.e. `%20`), but you can opt-in
#' to "form" encoding, which uses `+` instead.
#' @inheritParams url_modify_query
req_url_query <- function(.req,
...,
.multi = c("error", "comma", "pipe", "explode"),
.space = c("percent", "form")
) {
.space = c("percent", "form")) {
check_request(.req)

dots <- multi_dots(..., .multi = .multi, .space = .space)

url <- url_parse(.req$url)
url$query <- modify_list(url$query, !!!dots)
req_url(.req, url_build(url))
url <- url_modify_query(.req$url, ..., .multi = .multi, .space = .space)
req_url(.req, url)
}

#' @export
Expand Down
166 changes: 132 additions & 34 deletions R/url.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,15 @@ url_parse <- function(url, base_url = NULL) {

#' Modify a URL
#'
#' Modify components of a URL. The default value of each argument, `NULL`,
#' means leave the component as is. If you want to remove a component,
#' set it to `""`. Note that setting `scheme` or `hostname` to `""` will
#' create a relative URL.
#' @description
#' Use `url_modify()` to modify any component of the URL,
#' `url_modify_relative()` to modify with a relative URL,
#' or `url_modify_query()` to modify individual query parameters.
#'
#' For `url_modify()`, components that aren't specified in the
#' function call will be left as is; components set to `NULL` will be removed,
#' and all other values will be updated. Note that removing `scheme` or
#' `hostname` will create a relative URL.
#'
#' @param url A string or [parsed URL](url_parse).
#' @param scheme The scheme, typically either `http` or `https`.
Expand All @@ -67,16 +72,23 @@ url_parse <- function(url, base_url = NULL) {
#' url_modify("http://hadley.nz/abc", path = "")
#' url_modify("http://hadley.nz?a=1", query = "b=2")
#' url_modify("http://hadley.nz?a=1", query = list(c = 3))
#'
#' url_modify_query("http://hadley.nz?a=1&b=2", c = 3)
#' url_modify_query("http://hadley.nz?a=1&b=2", b = NULL)
#' url_modify_query("http://hadley.nz?a=1&b=2", a = 100)
#'
#' url_modify_relative("http://hadley.nz/a/b/c.html", "/d.html")
#' url_modify_relative("http://hadley.nz/a/b/c.html", "d.html")
#' url_modify_relative("http://hadley.nz/a/b/c.html", "../d.html")
url_modify <- function(url,
scheme = NULL,
hostname = NULL,
username = NULL,
password = NULL,
port = NULL,
path = NULL,
query = NULL,
fragment = NULL) {

scheme = as_is,
hostname = as_is,
username = as_is,
password = as_is,
port = as_is,
path = as_is,
query = as_is,
fragment = as_is) {
if (!is_string(url) && !is_url(url)) {
stop_input_type(url, "a string or parsed URL")
}
Expand All @@ -85,25 +97,25 @@ url_modify <- function(url,
url <- url_parse(url)
}

check_string(scheme, allow_null = TRUE)
check_string(hostname, allow_null = TRUE)
check_string(username, allow_null = TRUE)
check_string(password, allow_null = TRUE)
check_number_whole(port, min = 1, allow_null = TRUE)
check_string(path, allow_null = TRUE)
check_string(fragment, allow_null = TRUE)
if (!leave_as_is(scheme)) check_string(scheme, allow_null = TRUE)
if (!leave_as_is(hostname)) check_string(hostname, allow_null = TRUE)
if (!leave_as_is(username)) check_string(username, allow_null = TRUE)
if (!leave_as_is(password)) check_string(password, allow_null = TRUE)
if (!leave_as_is(port)) check_number_whole(port, min = 1, allow_null = TRUE)
if (!leave_as_is(path)) check_string(path, allow_null = TRUE)
if (!leave_as_is(fragment)) check_string(fragment, allow_null = TRUE)

if (is_string(query)) {
query <- query_parse(query)
} else if (is.list(query) && (is_named(query) || length(query) == 0)) {
query <- url_query_parse(query)
} else if (is_named_list(query)) {
for (nm in names(query)) {
check_query_param(query[[nm]], paste0("query$", nm))
}
} else if (!is.null(query)) {
} else if (!is.null(query) && !leave_as_is(query)) {
stop_input_type(query, "a character vector, named list, or NULL")
}

new <- compact(list(
new <- list(
scheme = scheme,
hostname = hostname,
username = username,
Expand All @@ -112,9 +124,8 @@ url_modify <- function(url,
path = path,
query = query,
fragment = fragment
))
is_empty <- map_lgl(new, identical, "")
new[is_empty] <- list(NULL)
)
new <- new[!map_lgl(new, leave_as_is)]
url[names(new)] <- new

if (string_url) {
Expand All @@ -124,6 +135,71 @@ url_modify <- function(url,
}
}

as_is <- quote(as_is)
leave_as_is <- function(x) identical(x, as_is)

#' @export
#' @rdname url_modify
#' @param relative_url A relative URL to append to the base URL.
url_modify_relative <- function(url, relative_url) {
string_url <- is_string(url)
if (!string_url) {
url <- url_build(url)
}

new_url <- url_parse(relative_url, base_url = url)

if (string_url) {
url_build(new_url)
} else {
new_url
}
}

#' @export
#' @rdname url_modify
#' @param ... <[`dynamic-dots`][rlang::dyn-dots]>
#' Name-value pairs that define query parameters. Each value must be either
#' an atomic vector or `NULL` (which removes the corresponding parameters).
#' If you want to opt out of escaping, wrap strings in `I()`.
#' @param .multi Controls what happens when a value is a vector:
#'
#' * `"error"`, the default, throws an error.
#' * `"comma"`, separates values with a `,`, e.g. `?x=1,2`.
#' * `"pipe"`, separates values with a `|`, e.g. `?x=1|2`.
#' * `"explode"`, turns each element into its own parameter, e.g. `?x=1&x=2`
#'
#' If none of these options work for your needs, you can instead supply a
#' function that takes a character vector of argument values and returns a
#' a single string.
#' @param .space How should spaces in query params be escaped? The default,
#' "percent", uses standard percent encoding (i.e. `%20`), but you can opt-in
#' to "form" encoding, which uses `+` instead.
url_modify_query <- function(
url,
...,
.multi = c("error", "comma", "pipe", "explode"),
.space = c("percent", "form")) {
if (!is_string(url) && !is_url(url)) {
stop_input_type(url, "a string or parsed URL")
}
string_url <- is_string(url)
if (string_url) {
url <- url_parse(url)
}

new_query <- multi_dots(..., .multi = .multi, .space = .space)
if (length(new_query) > 0) {
url$query <- modify_list(url$query, !!!new_query)
}

if (string_url) {
url_build(url)
} else {
url
}
}

is_url <- function(x) inherits(x, "httr2_url")

#' @export
Expand Down Expand Up @@ -175,7 +251,7 @@ url_build <- function(url) {
}

if (!is.null(url$query)) {
query <- query_build(url$query)
query <- url_query_build(url$query)
} else {
query <- NULL
}
Expand Down Expand Up @@ -213,9 +289,23 @@ url_build <- function(url) {
)
}

query_parse <- function(x) {
x <- gsub("^\\?", "", x) # strip leading ?, if present
params <- parse_name_equals_value(parse_delim(x, "&"))
#' Parse query parameters and/or build a string
#'
#' `url_query_parse()` parses a query string into a named list;
#' `url_query_build()` builds a query string from a named list.
#'
#' @param query A string, when parsing; a named list when building.
#' @export
#' @examples
#' str(url_query_parse("a=1&b=2"))
#'
#' url_query_build(list(x = 1, y = "z"))
#' url_query_build(list(x = 1, y = 1:2), .multi = "explode")
url_query_parse <- function(query) {
check_string(query)

query <- gsub("^\\?", "", query) # strip leading ?, if present
params <- parse_name_equals_value(parse_delim(query, "&"))

if (length(params) == 0) {
return(NULL)
Expand All @@ -226,12 +316,20 @@ query_parse <- function(x) {
out
}

query_build <- function(x, error_call = caller_env()) {
elements_build(x, "Query", "&", error_call = error_call)
#' @export
#' @rdname url_query_parse
#' @inheritParams url_modify_query
url_query_build <- function(query, .multi = c("error", "comma", "pipe", "explode")) {
if (!is_named_list(query)) {
stop_input_type(query, "a named list")
}

query <- multi_dots(!!!query, .multi = .multi, error_arg = "query")
elements_build(query, "Query", "&")
}

elements_build <- function(x, name, collapse, error_call = caller_env()) {
if (!is_list(x) || (!is_named(x) && length(x) > 0)) {
if (!is_named_list(x)) {
cli::cli_abort("{name} must be a named list.", call = error_call)
}

Expand Down
6 changes: 6 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ modify_list <- function(.x, ..., error_call = caller_env()) {
)
}



out <- .x[!names(.x) %in% names(dots)]
out <- c(out, compact(dots))

Expand Down Expand Up @@ -326,3 +328,7 @@ slice <- function(vector, start = 1, end = length(vector) + 1) {
vector[start:(end - 1)]
}
}

is_named_list <- function(x) {
is_list(x) && (is_named(x) || length(x) == 0)
}
3 changes: 1 addition & 2 deletions man/req_body.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions man/req_url.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f9bc4c9

Please sign in to comment.