From fe93f3c93c0729fde3c5a6b2775208ed62a48dad Mon Sep 17 00:00:00 2001 From: DinosL Date: Fri, 9 Jan 2026 15:54:03 +0100 Subject: [PATCH 1/4] curl: fix missing url --- src/s3fs.cpp | 3 +-- test/sql/curl_client/missing_url_issue.test | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 test/sql/curl_client/missing_url_issue.test diff --git a/src/s3fs.cpp b/src/s3fs.cpp index 1510bab7..a7b6a8d5 100644 --- a/src/s3fs.cpp +++ b/src/s3fs.cpp @@ -1236,13 +1236,12 @@ string AWSListObjectV2::Request(string &path, HTTPParams &http_params, S3AuthPar req_params += "&delimiter=%2F"; } - string listobjectv2_url = req_path + "?" + req_params; - auto header_map = CreateS3Header(req_path, req_params, parsed_url.host, "s3", "GET", s3_auth_params, "", "", "", ""); // Get requests use fresh connection string full_host = parsed_url.http_proto + parsed_url.host; + string listobjectv2_url = full_host + req_path + "?" + req_params; std::stringstream response; GetRequestInfo get_request( full_host, listobjectv2_url, header_map, http_params, diff --git a/test/sql/curl_client/missing_url_issue.test b/test/sql/curl_client/missing_url_issue.test new file mode 100644 index 00000000..541652af --- /dev/null +++ b/test/sql/curl_client/missing_url_issue.test @@ -0,0 +1,16 @@ +# name: test/sql/curl_client/missing_url_issue.test +# description: curl requires a full url to do a request, while httplib reconstructs it from components (hots, path, query) +# group: [httpfs] + +require httpfs + +require parquet + +statement ok +SET httpfs_client_implementation = curl; + +statement ok +SET s3_region='us-west-2'; + +statement ok +SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/2025-12-17.0/theme=places/type=place/*.parquet') LIMIT 10 \ No newline at end of file From b91491bdae4b8b060e9280c5461ecb1a949cd449 Mon Sep 17 00:00:00 2001 From: DinosL Date: Wed, 14 Jan 2026 15:55:46 +0100 Subject: [PATCH 2/4] add auth information to header --- src/httpfs_curl_client.cpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/httpfs_curl_client.cpp b/src/httpfs_curl_client.cpp index 18b81093..0f3c97ab 100644 --- a/src/httpfs_curl_client.cpp +++ b/src/httpfs_curl_client.cpp @@ -195,7 +195,7 @@ class HTTPFSCurlClient : public HTTPClient { state->get_count++; } - auto curl_headers = TransformHeadersCurl(info.headers); + auto curl_headers = TransformHeadersCurl(info.headers, info.params); request_info->url = info.url; if (!info.params.extra_headers.empty()) { auto curl_params = TransformParamsCurl(info.params); @@ -240,7 +240,7 @@ class HTTPFSCurlClient : public HTTPClient { state->total_bytes_sent += info.buffer_in_len; } - auto curl_headers = TransformHeadersCurl(info.headers); + auto curl_headers = TransformHeadersCurl(info.headers, info.params); // Add content type header from info curl_headers.Add("Content-Type: " + info.content_type); // transform parameters @@ -276,7 +276,7 @@ class HTTPFSCurlClient : public HTTPClient { state->head_count++; } - auto curl_headers = TransformHeadersCurl(info.headers); + auto curl_headers = TransformHeadersCurl(info.headers, info.params); request_info->url = info.url; // transform parameters if (!info.params.extra_headers.empty()) { @@ -310,7 +310,7 @@ class HTTPFSCurlClient : public HTTPClient { state->delete_count++; } - auto curl_headers = TransformHeadersCurl(info.headers); + auto curl_headers = TransformHeadersCurl(info.headers, info.params); // transform parameters request_info->url = info.url; if (!info.params.extra_headers.empty()) { @@ -348,7 +348,7 @@ class HTTPFSCurlClient : public HTTPClient { state->total_bytes_sent += info.buffer_in_len; } - auto curl_headers = TransformHeadersCurl(info.headers); + auto curl_headers = TransformHeadersCurl(info.headers, info.params); const string content_type = "Content-Type: application/octet-stream"; curl_headers.Add(content_type.c_str()); // transform parameters @@ -382,7 +382,9 @@ class HTTPFSCurlClient : public HTTPClient { } private: - CURLRequestHeaders TransformHeadersCurl(const HTTPHeaders &header_map) { + CURLRequestHeaders TransformHeadersCurl(const HTTPHeaders &header_map, const HTTPParams ¶ms) { + auto &httpfs_params = params.Cast(); + std::vector headers; for (auto &entry : header_map) { const std::string new_header = entry.first + ": " + entry.second; @@ -392,6 +394,11 @@ class HTTPFSCurlClient : public HTTPClient { for (auto &header : headers) { curl_headers.Add(header); } + if (!httpfs_params.pre_merged_headers) { + for (auto &entry : params.extra_headers) { + curl_headers.Add(entry.first + ": " + entry.second); + } + } return curl_headers; } From ee22299797a53eb62eab464e0962b94624023004 Mon Sep 17 00:00:00 2001 From: DinosL Date: Wed, 14 Jan 2026 15:57:23 +0100 Subject: [PATCH 3/4] don't encode custom headers as query params --- src/httpfs_curl_client.cpp | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/src/httpfs_curl_client.cpp b/src/httpfs_curl_client.cpp index 0f3c97ab..28ad46ef 100644 --- a/src/httpfs_curl_client.cpp +++ b/src/httpfs_curl_client.cpp @@ -197,10 +197,6 @@ class HTTPFSCurlClient : public HTTPClient { auto curl_headers = TransformHeadersCurl(info.headers, info.params); request_info->url = info.url; - if (!info.params.extra_headers.empty()) { - auto curl_params = TransformParamsCurl(info.params); - request_info->url += "?" + curl_params; - } CURLcode res; { @@ -245,10 +241,6 @@ class HTTPFSCurlClient : public HTTPClient { curl_headers.Add("Content-Type: " + info.content_type); // transform parameters request_info->url = info.url; - if (!info.params.extra_headers.empty()) { - auto curl_params = TransformParamsCurl(info.params); - request_info->url += "?" + curl_params; - } CURLcode res; { @@ -279,10 +271,6 @@ class HTTPFSCurlClient : public HTTPClient { auto curl_headers = TransformHeadersCurl(info.headers, info.params); request_info->url = info.url; // transform parameters - if (!info.params.extra_headers.empty()) { - auto curl_params = TransformParamsCurl(info.params); - request_info->url += "?" + curl_params; - } CURLcode res; { @@ -313,10 +301,6 @@ class HTTPFSCurlClient : public HTTPClient { auto curl_headers = TransformHeadersCurl(info.headers, info.params); // transform parameters request_info->url = info.url; - if (!info.params.extra_headers.empty()) { - auto curl_params = TransformParamsCurl(info.params); - request_info->url += "?" + curl_params; - } CURLcode res; { @@ -353,10 +337,6 @@ class HTTPFSCurlClient : public HTTPClient { curl_headers.Add(content_type.c_str()); // transform parameters request_info->url = info.url; - if (!info.params.extra_headers.empty()) { - auto curl_params = TransformParamsCurl(info.params); - request_info->url += "?" + curl_params; - } CURLcode res; { @@ -402,22 +382,6 @@ class HTTPFSCurlClient : public HTTPClient { return curl_headers; } - string TransformParamsCurl(const HTTPParams ¶ms) { - string result = ""; - unordered_map escaped_params; - bool first_param = true; - for (auto &entry : params.extra_headers) { - const string key = entry.first; - const string value = curl_easy_escape(*curl, entry.second.c_str(), 0); - if (!first_param) { - result += "&"; - } - result += key + "=" + value; - first_param = false; - } - return result; - } - void ResetRequestInfo() { // clear headers after transform request_info->header_collection.clear(); From a9b999099349131eeee97493ab31a4feefb1e03c Mon Sep 17 00:00:00 2001 From: DinosL Date: Wed, 14 Jan 2026 16:12:54 +0100 Subject: [PATCH 4/4] remove unecessary test --- test/sql/curl_client/missing_url_issue.test | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 test/sql/curl_client/missing_url_issue.test diff --git a/test/sql/curl_client/missing_url_issue.test b/test/sql/curl_client/missing_url_issue.test deleted file mode 100644 index 541652af..00000000 --- a/test/sql/curl_client/missing_url_issue.test +++ /dev/null @@ -1,16 +0,0 @@ -# name: test/sql/curl_client/missing_url_issue.test -# description: curl requires a full url to do a request, while httplib reconstructs it from components (hots, path, query) -# group: [httpfs] - -require httpfs - -require parquet - -statement ok -SET httpfs_client_implementation = curl; - -statement ok -SET s3_region='us-west-2'; - -statement ok -SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/2025-12-17.0/theme=places/type=place/*.parquet') LIMIT 10 \ No newline at end of file