diff --git a/src/httpfs_curl_client.cpp b/src/httpfs_curl_client.cpp index d4e39bf5..5712b2e4 100644 --- a/src/httpfs_curl_client.cpp +++ b/src/httpfs_curl_client.cpp @@ -181,6 +181,15 @@ class HTTPFSCurlClient : public HTTPClient { DestroyCurlGlobal(); } + static string EncodeSpaces(const string &url) { + string out; + out.reserve(url.size()); + for (char c : url) { + out += (c == ' ') ? "%20" : string(1, c); + } + return out; + } + unique_ptr Get(GetRequestInfo &info) override { if (state) { state->get_count++; @@ -197,7 +206,8 @@ class HTTPFSCurlClient : public HTTPClient { { // If the same handle served a HEAD request, we must set NOBODY back to 0L to request content again curl_easy_setopt(*curl, CURLOPT_NOBODY, 0L); - curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str()); + auto encoded_url = EncodeSpaces(request_info->url); + curl_easy_setopt(*curl, CURLOPT_URL, encoded_url.c_str()); curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr); res = curl->Execute(); } @@ -242,7 +252,8 @@ class HTTPFSCurlClient : public HTTPClient { CURLcode res; { - curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str()); + auto encoded_url = EncodeSpaces(request_info->url); + curl_easy_setopt(*curl, CURLOPT_URL, encoded_url.c_str()); // Perform PUT curl_easy_setopt(*curl, CURLOPT_CUSTOMREQUEST, "PUT"); // Include PUT body @@ -276,7 +287,8 @@ class HTTPFSCurlClient : public HTTPClient { CURLcode res; { // Set URL - curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str()); + auto encoded_url = EncodeSpaces(request_info->url); + curl_easy_setopt(*curl, CURLOPT_URL, encoded_url.c_str()); // Perform HEAD request instead of GET curl_easy_setopt(*curl, CURLOPT_NOBODY, 1L); @@ -309,7 +321,8 @@ class HTTPFSCurlClient : public HTTPClient { CURLcode res; { // Set URL - curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str()); + auto encoded_url = EncodeSpaces(request_info->url); + curl_easy_setopt(*curl, CURLOPT_URL, encoded_url.c_str()); // Set DELETE request method curl_easy_setopt(*curl, CURLOPT_CUSTOMREQUEST, "DELETE"); @@ -347,7 +360,8 @@ class HTTPFSCurlClient : public HTTPClient { CURLcode res; { - curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str()); + auto encoded_url = EncodeSpaces(request_info->url); + curl_easy_setopt(*curl, CURLOPT_URL, encoded_url.c_str()); curl_easy_setopt(*curl, CURLOPT_POST, 1L); // Set POST body diff --git a/test/sql/httpfs/curl_space_encoding.test b/test/sql/httpfs/curl_space_encoding.test new file mode 100644 index 00000000..128b491c --- /dev/null +++ b/test/sql/httpfs/curl_space_encoding.test @@ -0,0 +1,44 @@ +# name: test/sql/httpfs/curl_space_encoding.test +# description: This is part of url_encode.test to test space encoding with curl +# group: [httpfs] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +statement ok +CREATE TABLE test_1 as (SELECT 1 FROM range(0,5)); + +statement ok +SET httpfs_client_implementation = curl + +foreach prefix s3:// r2:// s3a:// s3n:// + +statement ok +COPY test_1 TO '${prefix}test-bucket-public/url_encode/just because you can doesnt mean you should.parquet' (FORMAT 'parquet'); + +# For HTTP urls, we also allow regular spaces, which will get encoded to %20 by duckdb +query I +SELECT * FROM "http://test-bucket-public.${DUCKDB_S3_ENDPOINT}/url_encode/just because you can doesnt mean you should.parquet" LIMIT 1; +---- +1 + + +endloop