Skip to content

Commit dba58cb

Browse files
authored
Merge pull request #417 from Tmonster/add_snowflake_gcs_test
Add support for Polaris with GCS backed storage
2 parents ad2ceeb + ffdaffa commit dba58cb

File tree

4 files changed

+110
-14
lines changed

4 files changed

+110
-14
lines changed

.github/workflows/CloudTesting.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,13 @@ jobs:
6565
AWS_ACCESS_KEY_ID: ${{secrets.S3_ICEBERG_TEST_USER_KEY_ID}}
6666
AWS_SECRET_ACCESS_KEY: ${{secrets.S3_ICEBERG_TEST_USER_SECRET}}
6767
AWS_DEFAULT_REGION: ${{secrets.S3_ICEBERG_TEST_USER_REGION}}
68+
SNOWFLAKE_KEY_ID_GCS: ${{secrets.SNOWFLAKE_KEY_ID_GCS}}
69+
SNOWFLAKE_SECRET_KEY_GCS: ${{secrets.SNOWFLAKE_SECRET_KEY_GCS}}
70+
SNOWFLAKE_CATALOG_URI_GCS: ${{secrets.SNOWFLAKE_CATALOG_URI}}
6871
R2_TOKEN: ${{secrets.r2_token}}
6972
ICEBERG_REMOTE_INSERT_READY: 1
7073
ICEBERG_AWS_REMOTE_AVAILABLE: 1
74+
ICEBERG_SNOWFLAKE_REMOTE_AVAILABLE: 1
7175
run: |
7276
make test_release
7377

extension_config.cmake

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ endif ()
3333
endif()
3434

3535
duckdb_extension_load(httpfs
36-
GIT_URL https://github.com/duckdb/duckdb-httpfs
37-
GIT_TAG da2821906eb42f7255d969be3e073bc1b45a71a8
36+
GIT_URL https://github.com/duckdb/duckdb-httpfs
37+
GIT_TAG e9bb99189d93c8ce6e0755907c38d283c963ae61
3838
INCLUDE_DIR extension/httpfs/include
3939
)
40+

src/storage/iceberg_table_information.cpp

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,30 @@ const string &IcebergTableInformation::BaseFilePath() const {
1515
return load_table_result.metadata.location;
1616
}
1717

18-
static void ParseConfigOptions(const case_insensitive_map_t<string> &config, case_insensitive_map_t<Value> &options) {
19-
//! Set of recognized config parameters and the duckdb secret option that matches it.
18+
static string DetectStorageType(const string &location) {
19+
// Detect storage type from the location URL
20+
if (StringUtil::StartsWith(location, "gs://") || StringUtil::Contains(location, "storage.googleapis.com")) {
21+
return "gcs";
22+
} else if (StringUtil::StartsWith(location, "s3://") || StringUtil::StartsWith(location, "s3a://")) {
23+
return "s3";
24+
} else if (StringUtil::StartsWith(location, "abfs://") || StringUtil::StartsWith(location, "az://")) {
25+
return "azure";
26+
}
27+
// Default to s3 for backward compatibility
28+
return "s3";
29+
}
30+
31+
static void ParseGCSConfigOptions(const case_insensitive_map_t<string> &config,
32+
case_insensitive_map_t<Value> &options) {
33+
// Parse GCS-specific configuration.
34+
auto token_it = config.find("gcs.oauth2.token");
35+
if (token_it != config.end()) {
36+
options["bearer_token"] = token_it->second;
37+
}
38+
}
39+
40+
static void ParseS3ConfigOptions(const case_insensitive_map_t<string> &config, case_insensitive_map_t<Value> &options) {
41+
// Set of recognized S3 config parameters and the duckdb secret option that matches it.
2042
static const case_insensitive_map_t<string> config_to_option = {{"s3.access-key-id", "key_id"},
2143
{"s3.secret-access-key", "secret"},
2244
{"s3.session-token", "session_token"},
@@ -25,15 +47,27 @@ static void ParseConfigOptions(const case_insensitive_map_t<string> &config, cas
2547
{"client.region", "region"},
2648
{"s3.endpoint", "endpoint"}};
2749

28-
if (config.empty()) {
29-
return;
30-
}
3150
for (auto &entry : config) {
3251
auto it = config_to_option.find(entry.first);
3352
if (it != config_to_option.end()) {
3453
options[it->second] = entry.second;
3554
}
3655
}
56+
}
57+
58+
static void ParseConfigOptions(const case_insensitive_map_t<string> &config, case_insensitive_map_t<Value> &options,
59+
const string &storage_type = "s3") {
60+
if (config.empty()) {
61+
return;
62+
}
63+
64+
// Parse storage-specific config options
65+
if (storage_type == "gcs") {
66+
ParseGCSConfigOptions(config, options);
67+
} else {
68+
// Default to S3 parsing for backward compatibility
69+
ParseS3ConfigOptions(config, options);
70+
}
3771

3872
auto it = config.find("s3.path-style-access");
3973
if (it != config.end()) {
@@ -105,19 +139,20 @@ IRCAPITableCredentials IcebergTableInformation::GetVendedCredentials(ClientConte
105139
}
106140
}
107141

108-
// Mapping from config key to a duckdb secret option
142+
// Detect storage type from metadata location
143+
const auto &metadata_location = load_table_result.metadata.location;
144+
string storage_type = DetectStorageType(metadata_location);
109145

146+
// Mapping from config key to a duckdb secret option
110147
case_insensitive_map_t<Value> config_options;
111148
//! TODO: apply the 'defaults' retrieved from the /v1/config endpoint
112149
config_options.insert(user_defaults.begin(), user_defaults.end());
113150

114151
if (load_table_result.has_config) {
115152
auto &config = load_table_result.config;
116-
ParseConfigOptions(config, config_options);
153+
ParseConfigOptions(config, config_options, storage_type);
117154
}
118155

119-
const auto &metadata_location = load_table_result.metadata.location;
120-
121156
if (load_table_result.has_storage_credentials) {
122157
auto &storage_credentials = load_table_result.storage_credentials;
123158

@@ -133,12 +168,12 @@ IRCAPITableCredentials IcebergTableInformation::GetVendedCredentials(ClientConte
133168
create_secret_input.scope.push_back(ignore_credential_prefix ? metadata_location : credential.prefix);
134169
create_secret_input.name = StringUtil::Format("%s_%d_%s", secret_base_name, index, credential.prefix);
135170

136-
create_secret_input.type = "s3";
171+
create_secret_input.type = storage_type;
137172
create_secret_input.provider = "config";
138173
create_secret_input.storage_type = "memory";
139174
create_secret_input.options = config_options;
140175

141-
ParseConfigOptions(credential.config, create_secret_input.options);
176+
ParseConfigOptions(credential.config, create_secret_input.options, storage_type);
142177
//! TODO: apply the 'overrides' retrieved from the /v1/config endpoint
143178
result.storage_credentials.push_back(create_secret_input);
144179
}
@@ -154,7 +189,7 @@ IRCAPITableCredentials IcebergTableInformation::GetVendedCredentials(ClientConte
154189
//! TODO: apply the 'overrides' retrieved from the /v1/config endpoint
155190
config.options = config_options;
156191
config.name = secret_base_name;
157-
config.type = "s3";
192+
config.type = storage_type;
158193
config.provider = "config";
159194
config.storage_type = "memory";
160195
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# name: test/sql/cloud/snowflake/test_snowflake.test
2+
# description: test integration with iceberg catalog read
3+
# group: [snowflake]
4+
5+
require-env ICEBERG_SNOWFLAKE_REMOTE_AVAILABLE
6+
7+
require-env SNOWFLAKE_KEY_ID_GCS
8+
9+
require-env SNOWFLAKE_SECRET_KEY_GCS
10+
11+
require-env SNOWFLAKE_CATALOG_URI_GCS
12+
13+
require avro
14+
15+
require parquet
16+
17+
require iceberg
18+
19+
require httpfs
20+
21+
require aws
22+
23+
24+
# Do not ignore 'HTTP' error messages!
25+
set ignore_error_messages
26+
27+
28+
statement ok
29+
create secret polaris_secret (
30+
TYPE ICEBERG,
31+
CLIENT_ID '${SNOWFLAKE_KEY_ID_GCS}',
32+
CLIENT_SECRET '${SNOWFLAKE_SECRET_KEY_GCS}',
33+
ENDPOINT '${SNOWFLAKE_CATALOG_URI_GCS}'
34+
);
35+
36+
37+
statement ok
38+
attach 'GCS_catalog' as my_datalake (
39+
type ICEBERG,
40+
ENDPOINT '${SNOWFLAKE_CATALOG_URI_GCS}'
41+
);
42+
43+
44+
query I
45+
select * from my_datalake.default.duckdb_created_table;
46+
----
47+
0
48+
1
49+
2
50+
3
51+
4
52+
5
53+
6
54+
7
55+
8
56+
9

0 commit comments

Comments
 (0)