diff --git a/src/httpfs_extension.cpp b/src/httpfs_extension.cpp index b61e515..7ce1ac1 100644 --- a/src/httpfs_extension.cpp +++ b/src/httpfs_extension.cpp @@ -128,6 +128,9 @@ static void LoadInternal(ExtensionLoader &loader) { }; config.AddExtensionOption("httpfs_client_implementation", "Select which is the HTTPUtil implementation to be used", LogicalType::VARCHAR, "default", callback_httpfs_client_implementation); + config.AddExtensionOption("disable_global_s3_configuration", + "Automatically fetch AWS credentials from environment variables.", LogicalType::BOOLEAN, + Value::BOOLEAN(true)); if (config.http_util && config.http_util->GetName() == "WasmHTTPUtils") { // Already handled, do not override diff --git a/src/include/s3fs.hpp b/src/include/s3fs.hpp index 2120da4..36e5579 100644 --- a/src/include/s3fs.hpp +++ b/src/include/s3fs.hpp @@ -20,6 +20,33 @@ namespace duckdb { +class S3KeyValueReader { +public: + S3KeyValueReader(FileOpener &opener_p, optional_ptr info, const char **secret_types, + idx_t secret_types_len); + + template + SettingLookupResult TryGetSecretKeyOrSetting(const string &secret_key, const string &setting_name, TYPE &result) { + Value temp_result; + auto setting_scope = reader.TryGetSecretKeyOrSetting(secret_key, setting_name, temp_result); + if (!temp_result.IsNull() && + !(setting_scope.GetScope() == SettingScope::GLOBAL && !use_env_variables_for_secret_settings)) { + result = temp_result.GetValue(); + } + return setting_scope; + } + + template + SettingLookupResult TryGetSecretKey(const string &secret_key, TYPE &value_out) { + // TryGetSecretKey never returns anything from global scope, so we don't need to check + return reader.TryGetSecretKey(secret_key, value_out); + } + +private: + bool use_env_variables_for_secret_settings; + KeyValueSecretReader reader; +}; + struct S3AuthParams { string region; string access_key_id; @@ -34,7 +61,7 @@ struct S3AuthParams { string oauth2_bearer_token; // OAuth2 bearer token for GCS static S3AuthParams ReadFrom(optional_ptr opener, FileOpenerInfo &info); - static S3AuthParams ReadFrom(KeyValueSecretReader& secret_reader, const std::string& file_path); + static S3AuthParams ReadFrom(S3KeyValueReader &secret_reader, const std::string &file_path); }; struct AWSEnvironmentCredentialsProvider { @@ -261,6 +288,7 @@ struct AWSListObjectV2 { }; HTTPHeaders CreateS3Header(string url, string query, string host, string service, string method, - const S3AuthParams &auth_params, string date_now = "", string datetime_now = "", - string payload_hash = "", string content_type = ""); + const S3AuthParams &auth_params, string date_now = "", string datetime_now = "", + string payload_hash = "", string content_type = ""); + } // namespace duckdb diff --git a/src/s3fs.cpp b/src/s3fs.cpp index 1e8a099..88ef1c4 100644 --- a/src/s3fs.cpp +++ b/src/s3fs.cpp @@ -29,8 +29,8 @@ namespace duckdb { HTTPHeaders CreateS3Header(string url, string query, string host, string service, string method, - const S3AuthParams &auth_params, string date_now, string datetime_now, string payload_hash, - string content_type) { + const S3AuthParams &auth_params, string date_now, string datetime_now, string payload_hash, + string content_type) { HTTPHeaders res; res["Host"] = host; @@ -191,12 +191,12 @@ S3AuthParams S3AuthParams::ReadFrom(optional_ptr opener, FileOpenerI } const char *secret_types[] = {"s3", "r2", "gcs", "aws"}; - KeyValueSecretReader secret_reader(*opener, info, secret_types, 3); + S3KeyValueReader secret_reader(*opener, info, secret_types, 3); return ReadFrom(secret_reader, info.file_path); } -S3AuthParams S3AuthParams::ReadFrom(KeyValueSecretReader &secret_reader, const std::string &file_path) { +S3AuthParams S3AuthParams::ReadFrom(S3KeyValueReader &secret_reader, const std::string &file_path) { auto result = S3AuthParams(); // These settings we just set or leave to their S3AuthParams default value @@ -210,7 +210,6 @@ S3AuthParams S3AuthParams::ReadFrom(KeyValueSecretReader &secret_reader, const s secret_reader.TryGetSecretKeyOrSetting("s3_url_compatibility_mode", "s3_url_compatibility_mode", result.s3_url_compatibility_mode); secret_reader.TryGetSecretKeyOrSetting("requester_pays", "s3_requester_pays", result.requester_pays); - // Endpoint and url style are slightly more complex and require special handling for gcs and r2 auto endpoint_result = secret_reader.TryGetSecretKeyOrSetting("endpoint", "s3_endpoint", result.endpoint); auto url_style_result = secret_reader.TryGetSecretKeyOrSetting("url_style", "s3_url_style", result.url_style); @@ -754,8 +753,8 @@ unique_ptr S3FileSystem::PostRequest(FileHandle &handle, string ur } else { // Use existing S3 authentication auto payload_hash = GetPayloadHash(buffer_in, buffer_in_len); - headers = CreateS3Header(parsed_s3_url.path, http_params, parsed_s3_url.host, "s3", "POST", auth_params, "", - "", payload_hash, "application/octet-stream"); + headers = CreateS3Header(parsed_s3_url.path, http_params, parsed_s3_url.host, "s3", "POST", auth_params, "", "", + payload_hash, "application/octet-stream"); } return HTTPFileSystem::PostRequest(handle, http_url, headers, result, buffer_in, buffer_in_len); @@ -777,8 +776,8 @@ unique_ptr S3FileSystem::PutRequest(FileHandle &handle, string url } else { // Use existing S3 authentication auto payload_hash = GetPayloadHash(buffer_in, buffer_in_len); - headers = CreateS3Header(parsed_s3_url.path, http_params, parsed_s3_url.host, "s3", "PUT", auth_params, "", - "", payload_hash, content_type); + headers = CreateS3Header(parsed_s3_url.path, http_params, parsed_s3_url.host, "s3", "PUT", auth_params, "", "", + payload_hash, content_type); } return HTTPFileSystem::PutRequest(handle, http_url, headers, buffer_in, buffer_in_len); @@ -796,8 +795,7 @@ unique_ptr S3FileSystem::HeadRequest(FileHandle &handle, string s3 headers["Host"] = parsed_s3_url.host; } else { // Use existing S3 authentication - headers = - CreateS3Header(parsed_s3_url.path, "", parsed_s3_url.host, "s3", "HEAD", auth_params, "", "", "", ""); + headers = CreateS3Header(parsed_s3_url.path, "", parsed_s3_url.host, "s3", "HEAD", auth_params, "", "", "", ""); } return HTTPFileSystem::HeadRequest(handle, http_url, headers); @@ -815,8 +813,7 @@ unique_ptr S3FileSystem::GetRequest(FileHandle &handle, string s3_ headers["Host"] = parsed_s3_url.host; } else { // Use existing S3 authentication - headers = - CreateS3Header(parsed_s3_url.path, "", parsed_s3_url.host, "s3", "GET", auth_params, "", "", "", ""); + headers = CreateS3Header(parsed_s3_url.path, "", parsed_s3_url.host, "s3", "GET", auth_params, "", "", "", ""); } return HTTPFileSystem::GetRequest(handle, http_url, headers); @@ -835,8 +832,7 @@ unique_ptr S3FileSystem::GetRangeRequest(FileHandle &handle, strin headers["Host"] = parsed_s3_url.host; } else { // Use existing S3 authentication - headers = - CreateS3Header(parsed_s3_url.path, "", parsed_s3_url.host, "s3", "GET", auth_params, "", "", "", ""); + headers = CreateS3Header(parsed_s3_url.path, "", parsed_s3_url.host, "s3", "GET", auth_params, "", "", "", ""); } return HTTPFileSystem::GetRangeRequest(handle, http_url, headers, file_offset, buffer_out, buffer_out_len); @@ -1382,4 +1378,13 @@ vector AWSListObjectV2::ParseCommonPrefix(string &aws_response) { return s3_prefixes; } +S3KeyValueReader::S3KeyValueReader(FileOpener &opener_p, optional_ptr info, const char **secret_types, + idx_t secret_types_len) + : reader(opener_p, info, secret_types, secret_types_len) { + Value use_env_vars_for_secret_info_setting; + reader.TryGetSecretKeyOrSetting("disable_global_s3_configuration", "disable_global_s3_configuration", + use_env_vars_for_secret_info_setting); + use_env_variables_for_secret_settings = use_env_vars_for_secret_info_setting.GetValue(); +} + } // namespace duckdb diff --git a/test/sql/test_read_public_bucket.test b/test/sql/test_read_public_bucket.test new file mode 100644 index 0000000..1c4c173 --- /dev/null +++ b/test/sql/test_read_public_bucket.test @@ -0,0 +1,40 @@ +# name: test/sql/test_read_public_bucket.test +# description: test aws extension with different chain configs +# group: [sql] + +require parquet + +require httpfs + +# should only run in CI when the test server is available. +# then we have access to invalid AWS ACCESS KEYS and SECRET KEYS +require-env S3_TEST_SERVER_AVAILABLE 1 + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +statement ok +set s3_region='us-east-2'; + +# set endpoint to the correct default, otherwise it will pick up the env variable +statement ok +set s3_endpoint='s3.amazonaws.com'; + +# see duckdb-internal/issues/6620 +# env vars for access_key_id and secret_key_id are used +# which results in 403 +statement error +SELECT * FROM read_parquet('s3://coiled-datasets/timeseries/20-years/parquet/part.0.parquet') LIMIT 5; +---- +:.*HTTP Error:.*403.*Authentication Failure.* + +# default to not using globally scoped settings for secrets +statement ok +set disable_global_s3_configuration=false; + +statement ok +SELECT * FROM read_parquet('s3://coiled-datasets/timeseries/20-years/parquet/part.0.parquet') LIMIT 5;