Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions src/httpfs_curl_client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,15 @@ class HTTPFSCurlClient : public HTTPClient {
DestroyCurlGlobal();
}

static string EncodeSpaces(const string &url) {
string out;
out.reserve(url.size());
for (char c : url) {
out += (c == ' ') ? "%20" : string(1, c);
}
return out;
}

unique_ptr<HTTPResponse> Get(GetRequestInfo &info) override {
if (state) {
state->get_count++;
Expand All @@ -197,7 +206,8 @@ class HTTPFSCurlClient : public HTTPClient {
{
// If the same handle served a HEAD request, we must set NOBODY back to 0L to request content again
curl_easy_setopt(*curl, CURLOPT_NOBODY, 0L);
curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str());
auto encoded_url = EncodeSpaces(request_info->url);
curl_easy_setopt(*curl, CURLOPT_URL, encoded_url.c_str());
curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr);
res = curl->Execute();
}
Expand Down Expand Up @@ -242,7 +252,8 @@ class HTTPFSCurlClient : public HTTPClient {

CURLcode res;
{
curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str());
auto encoded_url = EncodeSpaces(request_info->url);
curl_easy_setopt(*curl, CURLOPT_URL, encoded_url.c_str());
// Perform PUT
curl_easy_setopt(*curl, CURLOPT_CUSTOMREQUEST, "PUT");
// Include PUT body
Expand Down Expand Up @@ -276,7 +287,8 @@ class HTTPFSCurlClient : public HTTPClient {
CURLcode res;
{
// Set URL
curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str());
auto encoded_url = EncodeSpaces(request_info->url);
curl_easy_setopt(*curl, CURLOPT_URL, encoded_url.c_str());

// Perform HEAD request instead of GET
curl_easy_setopt(*curl, CURLOPT_NOBODY, 1L);
Expand Down Expand Up @@ -309,7 +321,8 @@ class HTTPFSCurlClient : public HTTPClient {
CURLcode res;
{
// Set URL
curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str());
auto encoded_url = EncodeSpaces(request_info->url);
curl_easy_setopt(*curl, CURLOPT_URL, encoded_url.c_str());

// Set DELETE request method
curl_easy_setopt(*curl, CURLOPT_CUSTOMREQUEST, "DELETE");
Expand Down Expand Up @@ -347,7 +360,8 @@ class HTTPFSCurlClient : public HTTPClient {

CURLcode res;
{
curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str());
auto encoded_url = EncodeSpaces(request_info->url);
curl_easy_setopt(*curl, CURLOPT_URL, encoded_url.c_str());
curl_easy_setopt(*curl, CURLOPT_POST, 1L);

// Set POST body
Expand Down
44 changes: 44 additions & 0 deletions test/sql/httpfs/curl_space_encoding.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# name: test/sql/httpfs/curl_space_encoding.test
# description: This is part of url_encode.test to test space encoding with curl
# group: [httpfs]

require parquet

require httpfs

require-env S3_TEST_SERVER_AVAILABLE 1

# Require that these environment variables are also set

require-env AWS_DEFAULT_REGION

require-env AWS_ACCESS_KEY_ID

require-env AWS_SECRET_ACCESS_KEY

require-env DUCKDB_S3_ENDPOINT

require-env DUCKDB_S3_USE_SSL

# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
set ignore_error_messages

statement ok
CREATE TABLE test_1 as (SELECT 1 FROM range(0,5));

statement ok
SET httpfs_client_implementation = curl

foreach prefix s3:// r2:// s3a:// s3n://

statement ok
COPY test_1 TO '${prefix}test-bucket-public/url_encode/just because you can doesnt mean you should.parquet' (FORMAT 'parquet');

# For HTTP urls, we also allow regular spaces, which will get encoded to %20 by duckdb
query I
SELECT * FROM "http://test-bucket-public.${DUCKDB_S3_ENDPOINT}/url_encode/just because you can doesnt mean you should.parquet" LIMIT 1;
----
1


endloop
Loading