Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/httpfs_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ static void LoadInternal(ExtensionLoader &loader) {
};
config.AddExtensionOption("httpfs_client_implementation", "Select which is the HTTPUtil implementation to be used",
LogicalType::VARCHAR, "default", callback_httpfs_client_implementation);
config.AddExtensionOption("disable_global_s3_configuration",
"Automatically fetch AWS credentials from environment variables.", LogicalType::BOOLEAN,
Value::BOOLEAN(true));

if (config.http_util && config.http_util->GetName() == "WasmHTTPUtils") {
// Already handled, do not override
Expand Down
34 changes: 31 additions & 3 deletions src/include/s3fs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,33 @@

namespace duckdb {

class S3KeyValueReader {
public:
S3KeyValueReader(FileOpener &opener_p, optional_ptr<FileOpenerInfo> info, const char **secret_types,
idx_t secret_types_len);

template <class TYPE>
SettingLookupResult TryGetSecretKeyOrSetting(const string &secret_key, const string &setting_name, TYPE &result) {
Value temp_result;
auto setting_scope = reader.TryGetSecretKeyOrSetting(secret_key, setting_name, temp_result);
if (!temp_result.IsNull() &&
!(setting_scope.GetScope() == SettingScope::GLOBAL && !use_env_variables_for_secret_settings)) {
result = temp_result.GetValue<TYPE>();
}
return setting_scope;
}

template <class TYPE>
SettingLookupResult TryGetSecretKey(const string &secret_key, TYPE &value_out) {
// TryGetSecretKey never returns anything from global scope, so we don't need to check
return reader.TryGetSecretKey(secret_key, value_out);
}

private:
bool use_env_variables_for_secret_settings;
KeyValueSecretReader reader;
};

struct S3AuthParams {
string region;
string access_key_id;
Expand All @@ -34,7 +61,7 @@ struct S3AuthParams {
string oauth2_bearer_token; // OAuth2 bearer token for GCS

static S3AuthParams ReadFrom(optional_ptr<FileOpener> opener, FileOpenerInfo &info);
static S3AuthParams ReadFrom(KeyValueSecretReader& secret_reader, const std::string& file_path);
static S3AuthParams ReadFrom(S3KeyValueReader &secret_reader, const std::string &file_path);
};

struct AWSEnvironmentCredentialsProvider {
Expand Down Expand Up @@ -261,6 +288,7 @@ struct AWSListObjectV2 {
};

HTTPHeaders CreateS3Header(string url, string query, string host, string service, string method,
const S3AuthParams &auth_params, string date_now = "", string datetime_now = "",
string payload_hash = "", string content_type = "");
const S3AuthParams &auth_params, string date_now = "", string datetime_now = "",
string payload_hash = "", string content_type = "");

} // namespace duckdb
35 changes: 20 additions & 15 deletions src/s3fs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
namespace duckdb {

HTTPHeaders CreateS3Header(string url, string query, string host, string service, string method,
const S3AuthParams &auth_params, string date_now, string datetime_now, string payload_hash,
string content_type) {
const S3AuthParams &auth_params, string date_now, string datetime_now, string payload_hash,
string content_type) {

HTTPHeaders res;
res["Host"] = host;
Expand Down Expand Up @@ -191,12 +191,12 @@ S3AuthParams S3AuthParams::ReadFrom(optional_ptr<FileOpener> opener, FileOpenerI
}

const char *secret_types[] = {"s3", "r2", "gcs", "aws"};
KeyValueSecretReader secret_reader(*opener, info, secret_types, 3);
S3KeyValueReader secret_reader(*opener, info, secret_types, 3);

return ReadFrom(secret_reader, info.file_path);
}

S3AuthParams S3AuthParams::ReadFrom(KeyValueSecretReader &secret_reader, const std::string &file_path) {
S3AuthParams S3AuthParams::ReadFrom(S3KeyValueReader &secret_reader, const std::string &file_path) {
auto result = S3AuthParams();

// These settings we just set or leave to their S3AuthParams default value
Expand All @@ -210,7 +210,6 @@ S3AuthParams S3AuthParams::ReadFrom(KeyValueSecretReader &secret_reader, const s
secret_reader.TryGetSecretKeyOrSetting("s3_url_compatibility_mode", "s3_url_compatibility_mode",
result.s3_url_compatibility_mode);
secret_reader.TryGetSecretKeyOrSetting("requester_pays", "s3_requester_pays", result.requester_pays);

// Endpoint and url style are slightly more complex and require special handling for gcs and r2
auto endpoint_result = secret_reader.TryGetSecretKeyOrSetting("endpoint", "s3_endpoint", result.endpoint);
auto url_style_result = secret_reader.TryGetSecretKeyOrSetting("url_style", "s3_url_style", result.url_style);
Expand Down Expand Up @@ -754,8 +753,8 @@ unique_ptr<HTTPResponse> S3FileSystem::PostRequest(FileHandle &handle, string ur
} else {
// Use existing S3 authentication
auto payload_hash = GetPayloadHash(buffer_in, buffer_in_len);
headers = CreateS3Header(parsed_s3_url.path, http_params, parsed_s3_url.host, "s3", "POST", auth_params, "",
"", payload_hash, "application/octet-stream");
headers = CreateS3Header(parsed_s3_url.path, http_params, parsed_s3_url.host, "s3", "POST", auth_params, "", "",
payload_hash, "application/octet-stream");
}

return HTTPFileSystem::PostRequest(handle, http_url, headers, result, buffer_in, buffer_in_len);
Expand All @@ -777,8 +776,8 @@ unique_ptr<HTTPResponse> S3FileSystem::PutRequest(FileHandle &handle, string url
} else {
// Use existing S3 authentication
auto payload_hash = GetPayloadHash(buffer_in, buffer_in_len);
headers = CreateS3Header(parsed_s3_url.path, http_params, parsed_s3_url.host, "s3", "PUT", auth_params, "",
"", payload_hash, content_type);
headers = CreateS3Header(parsed_s3_url.path, http_params, parsed_s3_url.host, "s3", "PUT", auth_params, "", "",
payload_hash, content_type);
}

return HTTPFileSystem::PutRequest(handle, http_url, headers, buffer_in, buffer_in_len);
Expand All @@ -796,8 +795,7 @@ unique_ptr<HTTPResponse> S3FileSystem::HeadRequest(FileHandle &handle, string s3
headers["Host"] = parsed_s3_url.host;
} else {
// Use existing S3 authentication
headers =
CreateS3Header(parsed_s3_url.path, "", parsed_s3_url.host, "s3", "HEAD", auth_params, "", "", "", "");
headers = CreateS3Header(parsed_s3_url.path, "", parsed_s3_url.host, "s3", "HEAD", auth_params, "", "", "", "");
}

return HTTPFileSystem::HeadRequest(handle, http_url, headers);
Expand All @@ -815,8 +813,7 @@ unique_ptr<HTTPResponse> S3FileSystem::GetRequest(FileHandle &handle, string s3_
headers["Host"] = parsed_s3_url.host;
} else {
// Use existing S3 authentication
headers =
CreateS3Header(parsed_s3_url.path, "", parsed_s3_url.host, "s3", "GET", auth_params, "", "", "", "");
headers = CreateS3Header(parsed_s3_url.path, "", parsed_s3_url.host, "s3", "GET", auth_params, "", "", "", "");
}

return HTTPFileSystem::GetRequest(handle, http_url, headers);
Expand All @@ -835,8 +832,7 @@ unique_ptr<HTTPResponse> S3FileSystem::GetRangeRequest(FileHandle &handle, strin
headers["Host"] = parsed_s3_url.host;
} else {
// Use existing S3 authentication
headers =
CreateS3Header(parsed_s3_url.path, "", parsed_s3_url.host, "s3", "GET", auth_params, "", "", "", "");
headers = CreateS3Header(parsed_s3_url.path, "", parsed_s3_url.host, "s3", "GET", auth_params, "", "", "", "");
}

return HTTPFileSystem::GetRangeRequest(handle, http_url, headers, file_offset, buffer_out, buffer_out_len);
Expand Down Expand Up @@ -1382,4 +1378,13 @@ vector<string> AWSListObjectV2::ParseCommonPrefix(string &aws_response) {
return s3_prefixes;
}

S3KeyValueReader::S3KeyValueReader(FileOpener &opener_p, optional_ptr<FileOpenerInfo> info, const char **secret_types,
idx_t secret_types_len)
: reader(opener_p, info, secret_types, secret_types_len) {
Value use_env_vars_for_secret_info_setting;
reader.TryGetSecretKeyOrSetting("disable_global_s3_configuration", "disable_global_s3_configuration",
use_env_vars_for_secret_info_setting);
use_env_variables_for_secret_settings = use_env_vars_for_secret_info_setting.GetValue<bool>();
}

} // namespace duckdb
40 changes: 40 additions & 0 deletions test/sql/test_read_public_bucket.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# name: test/sql/test_read_public_bucket.test
# description: test aws extension with different chain configs
# group: [sql]

require parquet

require httpfs

# should only run in CI when the test server is available.
# then we have access to invalid AWS ACCESS KEYS and SECRET KEYS
require-env S3_TEST_SERVER_AVAILABLE 1

require-env AWS_ACCESS_KEY_ID

require-env AWS_SECRET_ACCESS_KEY

# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
set ignore_error_messages

statement ok
set s3_region='us-east-2';

# set endpoint to the correct default, otherwise it will pick up the env variable
statement ok
set s3_endpoint='s3.amazonaws.com';

# see duckdb-internal/issues/6620
# env vars for access_key_id and secret_key_id are used
# which results in 403
statement error
SELECT * FROM read_parquet('s3://coiled-datasets/timeseries/20-years/parquet/part.0.parquet') LIMIT 5;
----
<REGEX>:.*HTTP Error:.*403.*Authentication Failure.*

# default to not using globally scoped settings for secrets
statement ok
set disable_global_s3_configuration=false;

statement ok
SELECT * FROM read_parquet('s3://coiled-datasets/timeseries/20-years/parquet/part.0.parquet') LIMIT 5;
Loading