From e88935abb9c90f0daf3ed8f2d6b17c258d05583d Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Wed, 1 Oct 2025 14:22:08 +0200 Subject: [PATCH] Add allow_asterisks_in_http_paths setting, that default to false, so that Globs on HTTP file system will now throw --- src/httpfs_extension.cpp | 2 ++ src/include/httpfs.hpp | 8 ++++++++ test/sql/httpfs/globbing.test | 28 ++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+) create mode 100644 test/sql/httpfs/globbing.test diff --git a/src/httpfs_extension.cpp b/src/httpfs_extension.cpp index b61e5154..4354d5d7 100644 --- a/src/httpfs_extension.cpp +++ b/src/httpfs_extension.cpp @@ -62,6 +62,8 @@ static void LoadInternal(ExtensionLoader &loader) { "http_keep_alive", "Keep alive connections. Setting this to false can help when running into connection failures", LogicalType::BOOLEAN, Value(true)); + config.AddExtensionOption("allow_asterisks_in_http_paths", "Allow '*' character in URLs users can query", + LogicalType::BOOLEAN, Value(false)); config.AddExtensionOption("enable_curl_server_cert_verification", "Enable server side certificate verification for CURL backend.", LogicalType::BOOLEAN, Value(true)); diff --git a/src/include/httpfs.hpp b/src/include/httpfs.hpp index c6baf131..907574d0 100644 --- a/src/include/httpfs.hpp +++ b/src/include/httpfs.hpp @@ -139,6 +139,14 @@ class HTTPFileSystem : public FileSystem { static bool TryParseLastModifiedTime(const string ×tamp, timestamp_t &result); vector Glob(const string &path, FileOpener *opener = nullptr) override { + if (path.find('*') != std::string::npos && opener) { + Value setting_val; + if (FileOpener::TryGetCurrentSetting(opener, "allow_asterisks_in_http_paths", setting_val) && + !setting_val.GetValue()) { + throw InvalidInputException("Globs (`*`) for generic HTTP file is are not supported.\nConsider `SET " + "allow_asterisks_in_http_paths = true;` to allow this behaviour"); + } + } return {path}; // FIXME } diff --git a/test/sql/httpfs/globbing.test b/test/sql/httpfs/globbing.test new file mode 100644 index 00000000..7ffe6f0b --- /dev/null +++ b/test/sql/httpfs/globbing.test @@ -0,0 +1,28 @@ +# name: test/sql/httpfs/globbing.test +# description: Ensure the HuggingFace filesystem works as expected +# group: [httpfs] + +require parquet + +require httpfs + +statement error +select parse_path(filename), size, part, date from read_parquet('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/hive-partitioning/simple/*/*/test.parquet') order by filename; +---- +Invalid Input Error: Globs (`*`) for generic HTTP file is are not supported. + +statement ok +SET allow_asterisks_in_http_paths = true; + +statement error +select parse_path(filename), size, part, date from read_parquet('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/hive-partitioning/simple/*/*/test.parquet') order by filename; +---- +HTTP Error: Unable to connect to URL + +statement ok +SET allow_asterisks_in_http_paths = false; + +statement error +select parse_path(filename), size, part, date from read_parquet('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/hive-partitioning/simple/*/*/test.parquet') order by filename; +---- +Invalid Input Error: Globs (`*`) for generic HTTP file is are not supported.