Skip to content

Commit fdf385c

Browse files
authored
Merge pull request #435 from Tmonster/v1.3-and-main
Update v1.3 with main code
2 parents 99abfcc + 98d4e44 commit fdf385c

10 files changed

+190
-44
lines changed

.github/workflows/CloudTesting.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,21 +58,27 @@ jobs:
5858
AWS_DEFAULT_REGION: ${{secrets.S3_ICEBERG_TEST_USER_REGION}}
5959
R2_TOKEN: ${{secrets.r2_token}}
6060
run: |
61-
python3 scripts/create_s3_insert_table.py --action=delete-and-create --catalogs=s3tables,r2
61+
python3 scripts/create_s3_insert_table.py --action=delete-and-create --catalogs=s3tables,r2,glue
6262
6363
- name: Test with rest catalog
6464
env:
6565
AWS_ACCESS_KEY_ID: ${{secrets.S3_ICEBERG_TEST_USER_KEY_ID}}
6666
AWS_SECRET_ACCESS_KEY: ${{secrets.S3_ICEBERG_TEST_USER_SECRET}}
6767
AWS_DEFAULT_REGION: ${{secrets.S3_ICEBERG_TEST_USER_REGION}}
68+
SNOWFLAKE_KEY_ID_GCS: ${{secrets.SNOWFLAKE_KEY_ID_GCS}}
69+
SNOWFLAKE_SECRET_KEY_GCS: ${{secrets.SNOWFLAKE_SECRET_KEY_GCS}}
70+
SNOWFLAKE_KEY_ID_S3: ${{secrets.SNOWFLAKE_KEY_ID_S3}}
71+
SNOWFLAKE_SECRET_KEY_S3: ${{secrets.SNOWFLAKE_SECRET_KEY_S3}}
72+
SNOWFLAKE_CATALOG_URI_GCS: ${{secrets.SNOWFLAKE_CATALOG_URI_GCS}}
6873
R2_TOKEN: ${{secrets.r2_token}}
6974
ICEBERG_REMOTE_INSERT_READY: 1
7075
ICEBERG_AWS_REMOTE_AVAILABLE: 1
76+
ICEBERG_SNOWFLAKE_REMOTE_AVAILABLE: 1
7177
run: |
7278
make test_release
7379
7480
- name: File issue if error
75-
if: failure()
81+
if: ${{ contains(github.ref_name, 'main') && failure() }}
7682
env:
7783
GH_TOKEN: ${{ github.token }}
7884
run: |

duckdb

Submodule duckdb updated 371 files

extension_config.cmake

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ endif ()
3333
endif()
3434

3535
duckdb_extension_load(httpfs
36-
GIT_URL https://github.com/duckdb/duckdb-httpfs
37-
GIT_TAG da2821906eb42f7255d969be3e073bc1b45a71a8
36+
GIT_URL https://github.com/duckdb/duckdb-httpfs
37+
GIT_TAG e9bb99189d93c8ce6e0755907c38d283c963ae61
3838
INCLUDE_DIR extension/httpfs/include
3939
)
40+

src/storage/iceberg_table_information.cpp

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,30 @@ const string &IcebergTableInformation::BaseFilePath() const {
1515
return load_table_result.metadata.location;
1616
}
1717

18-
static void ParseConfigOptions(const case_insensitive_map_t<string> &config, case_insensitive_map_t<Value> &options) {
19-
//! Set of recognized config parameters and the duckdb secret option that matches it.
18+
static string DetectStorageType(const string &location) {
19+
// Detect storage type from the location URL
20+
if (StringUtil::StartsWith(location, "gs://") || StringUtil::Contains(location, "storage.googleapis.com")) {
21+
return "gcs";
22+
} else if (StringUtil::StartsWith(location, "s3://") || StringUtil::StartsWith(location, "s3a://")) {
23+
return "s3";
24+
} else if (StringUtil::StartsWith(location, "abfs://") || StringUtil::StartsWith(location, "az://")) {
25+
return "azure";
26+
}
27+
// Default to s3 for backward compatibility
28+
return "s3";
29+
}
30+
31+
static void ParseGCSConfigOptions(const case_insensitive_map_t<string> &config,
32+
case_insensitive_map_t<Value> &options) {
33+
// Parse GCS-specific configuration.
34+
auto token_it = config.find("gcs.oauth2.token");
35+
if (token_it != config.end()) {
36+
options["bearer_token"] = token_it->second;
37+
}
38+
}
39+
40+
static void ParseS3ConfigOptions(const case_insensitive_map_t<string> &config, case_insensitive_map_t<Value> &options) {
41+
// Set of recognized S3 config parameters and the duckdb secret option that matches it.
2042
static const case_insensitive_map_t<string> config_to_option = {{"s3.access-key-id", "key_id"},
2143
{"s3.secret-access-key", "secret"},
2244
{"s3.session-token", "session_token"},
@@ -25,15 +47,27 @@ static void ParseConfigOptions(const case_insensitive_map_t<string> &config, cas
2547
{"client.region", "region"},
2648
{"s3.endpoint", "endpoint"}};
2749

28-
if (config.empty()) {
29-
return;
30-
}
3150
for (auto &entry : config) {
3251
auto it = config_to_option.find(entry.first);
3352
if (it != config_to_option.end()) {
3453
options[it->second] = entry.second;
3554
}
3655
}
56+
}
57+
58+
static void ParseConfigOptions(const case_insensitive_map_t<string> &config, case_insensitive_map_t<Value> &options,
59+
const string &storage_type = "s3") {
60+
if (config.empty()) {
61+
return;
62+
}
63+
64+
// Parse storage-specific config options
65+
if (storage_type == "gcs") {
66+
ParseGCSConfigOptions(config, options);
67+
} else {
68+
// Default to S3 parsing for backward compatibility
69+
ParseS3ConfigOptions(config, options);
70+
}
3771

3872
auto it = config.find("s3.path-style-access");
3973
if (it != config.end()) {
@@ -105,19 +139,20 @@ IRCAPITableCredentials IcebergTableInformation::GetVendedCredentials(ClientConte
105139
}
106140
}
107141

108-
// Mapping from config key to a duckdb secret option
142+
// Detect storage type from metadata location
143+
const auto &metadata_location = load_table_result.metadata.location;
144+
string storage_type = DetectStorageType(metadata_location);
109145

146+
// Mapping from config key to a duckdb secret option
110147
case_insensitive_map_t<Value> config_options;
111148
//! TODO: apply the 'defaults' retrieved from the /v1/config endpoint
112149
config_options.insert(user_defaults.begin(), user_defaults.end());
113150

114151
if (load_table_result.has_config) {
115152
auto &config = load_table_result.config;
116-
ParseConfigOptions(config, config_options);
153+
ParseConfigOptions(config, config_options, storage_type);
117154
}
118155

119-
const auto &metadata_location = load_table_result.metadata.location;
120-
121156
if (load_table_result.has_storage_credentials) {
122157
auto &storage_credentials = load_table_result.storage_credentials;
123158

@@ -133,12 +168,12 @@ IRCAPITableCredentials IcebergTableInformation::GetVendedCredentials(ClientConte
133168
create_secret_input.scope.push_back(ignore_credential_prefix ? metadata_location : credential.prefix);
134169
create_secret_input.name = StringUtil::Format("%s_%d_%s", secret_base_name, index, credential.prefix);
135170

136-
create_secret_input.type = "s3";
171+
create_secret_input.type = storage_type;
137172
create_secret_input.provider = "config";
138173
create_secret_input.storage_type = "memory";
139174
create_secret_input.options = config_options;
140175

141-
ParseConfigOptions(credential.config, create_secret_input.options);
176+
ParseConfigOptions(credential.config, create_secret_input.options, storage_type);
142177
//! TODO: apply the 'overrides' retrieved from the /v1/config endpoint
143178
result.storage_credentials.push_back(create_secret_input);
144179
}
@@ -154,7 +189,7 @@ IRCAPITableCredentials IcebergTableInformation::GetVendedCredentials(ClientConte
154189
//! TODO: apply the 'overrides' retrieved from the /v1/config endpoint
155190
config.options = config_options;
156191
config.name = secret_base_name;
157-
config.type = "s3";
192+
config.type = storage_type;
158193
config.provider = "config";
159194
config.storage_type = "memory";
160195
}

test/sql/cloud/glue/test_create_table_glue.test

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@ require httpfs
1818

1919
require aws
2020

21-
# credentials in CI cannot set up the environment for this test
22-
# need to give the crentials glue:DropTable priviledges
23-
mode skip
24-
2521
statement ok
2622
CREATE SECRET (
2723
TYPE S3,

test/sql/cloud/glue/test_direct_keys_glue.test

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,6 @@ require httpfs
1818

1919
require aws
2020

21-
# TODO: re-enable these tests once we know what account has these
22-
# credentials, and we can grant them access to the glue catalog
23-
# test using keys directory
24-
mode skip
25-
2621
statement ok
2722
CREATE SECRET s1 (
2823
TYPE S3,
@@ -37,10 +32,6 @@ attach '840140254803:s3tablescatalog/pyiceberg-blog-bucket' as my_datalake (
3732
ENDPOINT_TYPE 'GLUE'
3833
);
3934

40-
query T nosort tables_1
41-
show all tables;
42-
----
43-
4435
statement ok
4536
SELECT count(*) FROM my_datalake.myblognamespace.lineitem;
4637

test/sql/cloud/glue/test_direct_keys_glue_no_endpoint_type.test

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@ require httpfs
1818

1919
require aws
2020

21-
# TODO: re-enable these tests once we know what account has these
22-
# credentials, and we can grant them access to the glue catalog
23-
mode skip
24-
2521
# test using keys directory
2622
statement ok
2723
CREATE SECRET s1 (
@@ -38,10 +34,6 @@ attach '840140254803:s3tablescatalog/pyiceberg-blog-bucket' as my_datalake (
3834
ENDPOINT 'glue.us-east-1.amazonaws.com/iceberg'
3935
);
4036

41-
query T nosort tables_1
42-
show all tables;
43-
----
44-
4537
statement ok
4638
SELECT count(*) FROM my_datalake.myblognamespace.lineitem;
4739

@@ -51,8 +43,6 @@ drop secret s1;
5143
statement ok
5244
detach my_datalake;
5345

54-
mode unskip
55-
5646
# test using assume role
5747
statement ok
5848
CREATE SECRET assume_role_secret (

test/sql/cloud/glue/test_insert_glue.test

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,6 @@ require httpfs
2020

2121
require aws
2222

23-
# credentials in CI cannot set up the environment for this test
24-
# need to give the crentials glue:DropTable priviledges
25-
mode skip
26-
2723
statement ok
2824
CREATE SECRET (
2925
TYPE S3,
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# name: test/sql/cloud/snowflake/test_snowflake.test
2+
# description: test integration with iceberg catalog read
3+
# group: [snowflake]
4+
5+
require-env ICEBERG_SNOWFLAKE_REMOTE_AVAILABLE
6+
7+
require-env SNOWFLAKE_KEY_ID_GCS
8+
9+
require-env SNOWFLAKE_SECRET_KEY_GCS
10+
11+
require-env SNOWFLAKE_CATALOG_URI_GCS
12+
13+
require avro
14+
15+
require parquet
16+
17+
require iceberg
18+
19+
require httpfs
20+
21+
require aws
22+
23+
24+
# Do not ignore 'HTTP' error messages!
25+
set ignore_error_messages
26+
27+
28+
statement ok
29+
create secret polaris_secret (
30+
TYPE ICEBERG,
31+
CLIENT_ID '${SNOWFLAKE_KEY_ID_GCS}',
32+
CLIENT_SECRET '${SNOWFLAKE_SECRET_KEY_GCS}',
33+
ENDPOINT '${SNOWFLAKE_CATALOG_URI_GCS}'
34+
);
35+
36+
37+
statement ok
38+
attach 'GCS_catalog' as my_datalake (
39+
type ICEBERG,
40+
ENDPOINT '${SNOWFLAKE_CATALOG_URI_GCS}'
41+
);
42+
43+
statement ok
44+
create schema if not exists my_datalake.test_create_schema;
45+
46+
statement ok
47+
create table my_datalake.test_create_schema.table1 as select range a from range(10);
48+
49+
query I
50+
select * from my_datalake.test_create_schema.table1;
51+
----
52+
0
53+
1
54+
2
55+
3
56+
4
57+
5
58+
6
59+
7
60+
8
61+
9
62+
63+
statement ok
64+
drop table if exists my_datalake.test_create_schema.table1;
65+
66+
statement ok
67+
drop schema if exists my_datalake.test_create_schema;
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# name: test/sql/cloud/snowflake/test_snowflake_s3.test
2+
# group: [snowflake]
3+
4+
require-env SNOWFLAKE_KEY_ID_S3
5+
6+
require-env SNOWFLAKE_SECRET_ID_S3
7+
8+
require-env SNOWFLAKE_CATALOG_URI_GCS
9+
10+
require avro
11+
12+
require parquet
13+
14+
require iceberg
15+
16+
require httpfs
17+
18+
require aws
19+
20+
# Do not ignore 'HTTP' error messages!
21+
set ignore_error_messages
22+
23+
statement ok
24+
create secret polaris_secret (
25+
TYPE ICEBERG,
26+
CLIENT_ID '${SNOWFLAKE_KEY_ID_S3}',
27+
CLIENT_SECRET '${SNOWFLAKE_SECRET_KEY_S3}',
28+
ENDPOINT '${SNOWFLAKE_CATALOG_URI_GCS}'
29+
);
30+
31+
32+
statement ok
33+
attach 's3-catalog' as my_datalake (
34+
type ICEBERG,
35+
default_region 'eu-west-2',
36+
ENDPOINT '${SNOWFLAKE_CATALOG_URI_GCS}'
37+
);
38+
39+
40+
statement ok
41+
create schema if not exists my_datalake.test_create_schema;
42+
43+
statement ok
44+
create table my_datalake.test_create_schema.table1 as select range a from range(10);
45+
46+
query I
47+
select * from my_datalake.test_create_schema.table1;
48+
----
49+
0
50+
1
51+
2
52+
3
53+
4
54+
5
55+
6
56+
7
57+
8
58+
9
59+
60+
statement ok
61+
drop table if exists my_datalake.test_create_schema.table1;
62+
63+
statement ok
64+
drop schema if exists my_datalake.test_create_schema;

0 commit comments

Comments
 (0)