From cae995e5261fe75e9fe451cd888a6478ef27d680 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Tue, 23 Dec 2025 16:53:11 +0800 Subject: [PATCH 01/10] chore(query): allow_anonymous when key token is empty --- src/common/storage/src/operator.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/common/storage/src/operator.rs b/src/common/storage/src/operator.rs index 5f9834696fb8c..dcd1a3c19dc61 100644 --- a/src/common/storage/src/operator.rs +++ b/src/common/storage/src/operator.rs @@ -409,6 +409,14 @@ fn init_s3_operator(cfg: &StorageS3Config) -> Result { // Root. .root(&cfg.root); + if cfg.access_key_id.is_empty() + && cfg.secret_access_key.is_empty() + && cfg.security_token.is_empty() + && cfg.role_arn.is_empty() + { + builder = builder.allow_anonymous(); + } + if cfg.storage_class != S3StorageClass::Standard { // Apply S3 storage class to the operator. // Note: Some S3-compatible storage systems (e.g., MinIO) may not support From 8795a812fee02984cc4832f6fbff659736385215 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Tue, 23 Dec 2025 17:00:45 +0800 Subject: [PATCH 02/10] test(sqllogictest): add public S3 stage list regression --- .../base/05_ddl/05_0016_ddl_stage_public_s3_list.test | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 tests/sqllogictests/suites/base/05_ddl/05_0016_ddl_stage_public_s3_list.test diff --git a/tests/sqllogictests/suites/base/05_ddl/05_0016_ddl_stage_public_s3_list.test b/tests/sqllogictests/suites/base/05_ddl/05_0016_ddl_stage_public_s3_list.test new file mode 100644 index 0000000000000..2051036fc7539 --- /dev/null +++ b/tests/sqllogictests/suites/base/05_ddl/05_0016_ddl_stage_public_s3_list.test @@ -0,0 +1,11 @@ +statement ok +DROP STAGE IF EXISTS wizardbend_tpch + +statement ok +CREATE OR REPLACE STAGE wizardbend_tpch URL='s3://wizardbend/TPC-H/1TB/customer/' + +statement ok +LIST @wizardbend_tpch + +statement ok +DROP STAGE IF EXISTS wizardbend_tpch From 9377b5c3e142eaf5a79b889cd54867d8b854a7c2 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Tue, 23 Dec 2025 17:47:53 +0800 Subject: [PATCH 03/10] chore(storage): gate anonymous S3 by credential loader --- src/common/storage/src/operator.rs | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/common/storage/src/operator.rs b/src/common/storage/src/operator.rs index dcd1a3c19dc61..80f6f834f8579 100644 --- a/src/common/storage/src/operator.rs +++ b/src/common/storage/src/operator.rs @@ -409,14 +409,6 @@ fn init_s3_operator(cfg: &StorageS3Config) -> Result { // Root. .root(&cfg.root); - if cfg.access_key_id.is_empty() - && cfg.secret_access_key.is_empty() - && cfg.security_token.is_empty() - && cfg.role_arn.is_empty() - { - builder = builder.allow_anonymous(); - } - if cfg.storage_class != S3StorageClass::Standard { // Apply S3 storage class to the operator. // Note: Some S3-compatible storage systems (e.g., MinIO) may not support @@ -429,6 +421,21 @@ fn init_s3_operator(cfg: &StorageS3Config) -> Result { builder = builder.disable_config_load().disable_ec2_metadata(); } + // Force anonymous (unsigned) requests only when credential loader is disabled and no explicit + // credentials are provided. This is mainly for external stages to read public buckets safely + // without accidentally using the tenant role from the environment (env/profile/IMDS/IRSA). + // + // Don't enable it when credential loader is allowed, otherwise it would bypass the default + // credential chain that internal storage configurations may rely on. + if cfg.disable_credential_loader + && cfg.access_key_id.is_empty() + && cfg.secret_access_key.is_empty() + && cfg.security_token.is_empty() + && cfg.role_arn.is_empty() + { + builder = builder.allow_anonymous(); + } + // Enable virtual host style if cfg.enable_virtual_host_style { builder = builder.enable_virtual_host_style(); From df005368104f1788794539f9e098c685675ab67e Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Tue, 23 Dec 2025 18:01:45 +0800 Subject: [PATCH 04/10] chore(storage): clarify anonymous S3 comment --- src/common/storage/src/operator.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/common/storage/src/operator.rs b/src/common/storage/src/operator.rs index 80f6f834f8579..e7e8adcd3961b 100644 --- a/src/common/storage/src/operator.rs +++ b/src/common/storage/src/operator.rs @@ -421,12 +421,8 @@ fn init_s3_operator(cfg: &StorageS3Config) -> Result { builder = builder.disable_config_load().disable_ec2_metadata(); } - // Force anonymous (unsigned) requests only when credential loader is disabled and no explicit - // credentials are provided. This is mainly for external stages to read public buckets safely - // without accidentally using the tenant role from the environment (env/profile/IMDS/IRSA). - // - // Don't enable it when credential loader is allowed, otherwise it would bypass the default - // credential chain that internal storage configurations may rely on. + // If credential loading is disabled and no credentials are provided, use unsigned requests. + // This allows accessing public buckets reliably in environments where signing could be rejected. if cfg.disable_credential_loader && cfg.access_key_id.is_empty() && cfg.secret_access_key.is_empty() From 56c6ad208abe836a05ef2d727449b6a71ab397f3 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Tue, 23 Dec 2025 19:09:03 +0800 Subject: [PATCH 05/10] fix(storage): enable anonymous S3 access when no explicit credentials provided Remove the restrictive disable_credential_loader condition that prevented anonymous access to public S3 buckets in Databend Cloud environment. When users create an external stage pointing to a public bucket without providing credentials, OpenDAL should use unsigned requests. The previous condition required disable_credential_loader=true which is never set in cloud environments that rely on EC2/K8s metadata for credentials. OpenDAL will still prefer credentials from environment/EC2 metadata when available, and only fall back to unsigned requests when no credentials can be obtained from any source. --- src/common/storage/src/operator.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/common/storage/src/operator.rs b/src/common/storage/src/operator.rs index e7e8adcd3961b..d5a5e31d4f18e 100644 --- a/src/common/storage/src/operator.rs +++ b/src/common/storage/src/operator.rs @@ -421,10 +421,11 @@ fn init_s3_operator(cfg: &StorageS3Config) -> Result { builder = builder.disable_config_load().disable_ec2_metadata(); } - // If credential loading is disabled and no credentials are provided, use unsigned requests. - // This allows accessing public buckets reliably in environments where signing could be rejected. - if cfg.disable_credential_loader - && cfg.access_key_id.is_empty() + // Enable anonymous access when no explicit credentials are provided. + // This allows accessing public S3 buckets without requiring IAM permissions. + // OpenDAL will prefer credentials from environment/EC2 metadata when available, + // and fall back to unsigned requests only when no credentials can be obtained. + if cfg.access_key_id.is_empty() && cfg.secret_access_key.is_empty() && cfg.security_token.is_empty() && cfg.role_arn.is_empty() From 6c3f1da445334690488d8d3da64d71f5342813ee Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Tue, 23 Dec 2025 19:32:43 +0800 Subject: [PATCH 06/10] fix(storage): restore unconditional allow_anonymous and handle PermissionDenied Restore unconditional allow_anonymous() in S3 operator initialization. This enables accessing public S3 buckets without explicit credentials. OpenDAL will prefer credentials when available and fall back to anonymous access only when needed. Also update check_operator() to accept PermissionDenied as a valid response. When allow_anonymous() is enabled and anonymous stat is attempted on a private bucket, it returns 403 instead of 404 for non-existent files. This is expected behavior - it just means the bucket exists but anonymous access is denied. Remove the unreliable external S3 test that depends on the wizardbend bucket being accessible from CI environments. --- src/common/storage/src/operator.rs | 21 +++++++------------ .../05_0016_ddl_stage_public_s3_list.test | 11 ---------- 2 files changed, 7 insertions(+), 25 deletions(-) delete mode 100644 tests/sqllogictests/suites/base/05_ddl/05_0016_ddl_stage_public_s3_list.test diff --git a/src/common/storage/src/operator.rs b/src/common/storage/src/operator.rs index d5a5e31d4f18e..6851788208fd2 100644 --- a/src/common/storage/src/operator.rs +++ b/src/common/storage/src/operator.rs @@ -404,8 +404,9 @@ fn init_s3_operator(cfg: &StorageS3Config) -> Result { .session_token(&cfg.security_token) .role_arn(&cfg.role_arn) .external_id(&cfg.external_id) - // Don't enable it otherwise we will get Permission in stat unknown files - // .allow_anonymous() + // It's safe to allow anonymous since opendal will prefer credentials when available. + // This enables accessing public buckets without explicit credentials. + .allow_anonymous() // Root. .root(&cfg.root); @@ -421,18 +422,6 @@ fn init_s3_operator(cfg: &StorageS3Config) -> Result { builder = builder.disable_config_load().disable_ec2_metadata(); } - // Enable anonymous access when no explicit credentials are provided. - // This allows accessing public S3 buckets without requiring IAM permissions. - // OpenDAL will prefer credentials from environment/EC2 metadata when available, - // and fall back to unsigned requests only when no credentials can be obtained. - if cfg.access_key_id.is_empty() - && cfg.secret_access_key.is_empty() - && cfg.security_token.is_empty() - && cfg.role_arn.is_empty() - { - builder = builder.allow_anonymous(); - } - // Enable virtual host style if cfg.enable_virtual_host_style { builder = builder.enable_virtual_host_style(); @@ -628,6 +617,10 @@ pub async fn check_operator( match res { Ok(_) => Ok(()), Err(e) if e.kind() == opendal::ErrorKind::NotFound => Ok(()), + // PermissionDenied on the checker file is acceptable - it just means + // anonymous access is denied on this bucket, which is expected for private buckets. + // The actual data access will use proper credentials. + Err(e) if e.kind() == opendal::ErrorKind::PermissionDenied => Ok(()), Err(e) => Err(e), } }) diff --git a/tests/sqllogictests/suites/base/05_ddl/05_0016_ddl_stage_public_s3_list.test b/tests/sqllogictests/suites/base/05_ddl/05_0016_ddl_stage_public_s3_list.test deleted file mode 100644 index 2051036fc7539..0000000000000 --- a/tests/sqllogictests/suites/base/05_ddl/05_0016_ddl_stage_public_s3_list.test +++ /dev/null @@ -1,11 +0,0 @@ -statement ok -DROP STAGE IF EXISTS wizardbend_tpch - -statement ok -CREATE OR REPLACE STAGE wizardbend_tpch URL='s3://wizardbend/TPC-H/1TB/customer/' - -statement ok -LIST @wizardbend_tpch - -statement ok -DROP STAGE IF EXISTS wizardbend_tpch From 20409968dbf35250c4fd9fa813652a67b4ae7e41 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Tue, 23 Dec 2025 19:35:25 +0800 Subject: [PATCH 07/10] Revert to df005368: conditional allow_anonymous with disable_credential_loader --- src/common/storage/src/operator.rs | 20 ++++++++++++------- .../05_0016_ddl_stage_public_s3_list.test | 11 ++++++++++ 2 files changed, 24 insertions(+), 7 deletions(-) create mode 100644 tests/sqllogictests/suites/base/05_ddl/05_0016_ddl_stage_public_s3_list.test diff --git a/src/common/storage/src/operator.rs b/src/common/storage/src/operator.rs index 6851788208fd2..e7e8adcd3961b 100644 --- a/src/common/storage/src/operator.rs +++ b/src/common/storage/src/operator.rs @@ -404,9 +404,8 @@ fn init_s3_operator(cfg: &StorageS3Config) -> Result { .session_token(&cfg.security_token) .role_arn(&cfg.role_arn) .external_id(&cfg.external_id) - // It's safe to allow anonymous since opendal will prefer credentials when available. - // This enables accessing public buckets without explicit credentials. - .allow_anonymous() + // Don't enable it otherwise we will get Permission in stat unknown files + // .allow_anonymous() // Root. .root(&cfg.root); @@ -422,6 +421,17 @@ fn init_s3_operator(cfg: &StorageS3Config) -> Result { builder = builder.disable_config_load().disable_ec2_metadata(); } + // If credential loading is disabled and no credentials are provided, use unsigned requests. + // This allows accessing public buckets reliably in environments where signing could be rejected. + if cfg.disable_credential_loader + && cfg.access_key_id.is_empty() + && cfg.secret_access_key.is_empty() + && cfg.security_token.is_empty() + && cfg.role_arn.is_empty() + { + builder = builder.allow_anonymous(); + } + // Enable virtual host style if cfg.enable_virtual_host_style { builder = builder.enable_virtual_host_style(); @@ -617,10 +627,6 @@ pub async fn check_operator( match res { Ok(_) => Ok(()), Err(e) if e.kind() == opendal::ErrorKind::NotFound => Ok(()), - // PermissionDenied on the checker file is acceptable - it just means - // anonymous access is denied on this bucket, which is expected for private buckets. - // The actual data access will use proper credentials. - Err(e) if e.kind() == opendal::ErrorKind::PermissionDenied => Ok(()), Err(e) => Err(e), } }) diff --git a/tests/sqllogictests/suites/base/05_ddl/05_0016_ddl_stage_public_s3_list.test b/tests/sqllogictests/suites/base/05_ddl/05_0016_ddl_stage_public_s3_list.test new file mode 100644 index 0000000000000..2051036fc7539 --- /dev/null +++ b/tests/sqllogictests/suites/base/05_ddl/05_0016_ddl_stage_public_s3_list.test @@ -0,0 +1,11 @@ +statement ok +DROP STAGE IF EXISTS wizardbend_tpch + +statement ok +CREATE OR REPLACE STAGE wizardbend_tpch URL='s3://wizardbend/TPC-H/1TB/customer/' + +statement ok +LIST @wizardbend_tpch + +statement ok +DROP STAGE IF EXISTS wizardbend_tpch From 3f6b6d7ba305776c93243c770b43dca36aae2547 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Tue, 23 Dec 2025 19:46:11 +0800 Subject: [PATCH 08/10] ci(sqllogic): run public S3 stage test with insecure off --- .../test_sqllogic_standalone_linux/action.yml | 3 ++- .github/workflows/reuse.sqllogic.yml | 17 +++++++++++++ scripts/ci/ci-run-sqllogic-tests-public-s3.sh | 24 +++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) create mode 100755 scripts/ci/ci-run-sqllogic-tests-public-s3.sh diff --git a/.github/actions/test_sqllogic_standalone_linux/action.yml b/.github/actions/test_sqllogic_standalone_linux/action.yml index d3748c53e44a3..d720dbb6fa5f1 100644 --- a/.github/actions/test_sqllogic_standalone_linux/action.yml +++ b/.github/actions/test_sqllogic_standalone_linux/action.yml @@ -35,6 +35,7 @@ runs: env: TEST_HANDLERS: ${{ inputs.handlers }} TEST_PARALLEL: ${{ inputs.parallel }} + TEST_EXT_ARGS: ${{ inputs.dirs == 'base' && '--skip_file 05_0016_ddl_stage_public_s3_list.test' || '' }} CACHE_ENABLE_TABLE_META_CACHE: ${{ inputs.enable_table_meta_cache}} run: bash ./scripts/ci/ci-run-sqllogic-tests.sh ${{ inputs.dirs }} @@ -44,6 +45,6 @@ runs: env: TEST_HANDLERS: ${{ inputs.handlers }} TEST_PARALLEL: ${{ inputs.parallel }} - TEST_EXT_ARGS: '--skip_file tpcds_spill_1.test,tpcds_spill_2.test,tpcds_spill_3.test' + TEST_EXT_ARGS: ${{ inputs.dirs == 'base' && '--skip_file tpcds_spill_1.test,tpcds_spill_2.test,tpcds_spill_3.test,05_0016_ddl_stage_public_s3_list.test' || '--skip_file tpcds_spill_1.test,tpcds_spill_2.test,tpcds_spill_3.test' }} CACHE_ENABLE_TABLE_META_CACHE: ${{ inputs.enable_table_meta_cache}} run: bash ./scripts/ci/ci-run-sqllogic-tests-native.sh ${{ inputs.dirs }} diff --git a/.github/workflows/reuse.sqllogic.yml b/.github/workflows/reuse.sqllogic.yml index c189994243116..6eceb1ec0b767 100644 --- a/.github/workflows/reuse.sqllogic.yml +++ b/.github/workflows/reuse.sqllogic.yml @@ -81,6 +81,23 @@ jobs: with: name: test-sqllogic-standalone-${{ matrix.tests.dirs }}-${{ matrix.handler }} + public_s3_stage_list: + runs-on: + - self-hosted + - ${{ inputs.runner_arch }} + - Linux + - 2c8g + - "${{ inputs.runner_provider }}" + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/setup_test + with: + artifacts: sqllogictests,meta,query + - name: Run public S3 stage list test (insecure disabled) + timeout-minutes: 10 + shell: bash + run: bash ./scripts/ci/ci-run-sqllogic-tests-public-s3.sh + standalone_udf_server: runs-on: - self-hosted diff --git a/scripts/ci/ci-run-sqllogic-tests-public-s3.sh b/scripts/ci/ci-run-sqllogic-tests-public-s3.sh new file mode 100755 index 0000000000000..de595e27902b9 --- /dev/null +++ b/scripts/ci/ci-run-sqllogic-tests-public-s3.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Copyright 2020-2021 The Databend Authors. +# SPDX-License-Identifier: Apache-2.0. + +set -e + +# Run this test with insecure disabled so external S3 stages don't load credentials +# from the environment and can access public buckets via unsigned requests. +export STORAGE_ALLOW_INSECURE=false + +echo "Starting standalone DatabendQuery and DatabendMeta" +./scripts/ci/deploy/databend-query-standalone.sh + +TEST_HANDLERS=${TEST_HANDLERS:-"http"} +BUILD_PROFILE=${BUILD_PROFILE:-debug} + +echo "Starting databend-sqllogic tests" +target/${BUILD_PROFILE}/databend-sqllogictests \ + --handlers ${TEST_HANDLERS} \ + --run_file tests/sqllogictests/suites/base/05_ddl/05_0016_ddl_stage_public_s3_list.test \ + --enable_sandbox \ + --parallel 1 \ + ${TEST_EXT_ARGS} + From 683303d413158d7f9a1a36742ceb5bb2df6e4dc1 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Tue, 23 Dec 2025 20:22:54 +0800 Subject: [PATCH 09/10] fix(stage): avoid loading creds for external S3 stage --- .../test_sqllogic_standalone_linux/action.yml | 3 +-- .github/workflows/reuse.sqllogic.yml | 17 ------------- scripts/ci/ci-run-sqllogic-tests-public-s3.sh | 24 ------------------- src/query/sql/src/planner/binder/ddl/stage.rs | 12 ++++++++++ 4 files changed, 13 insertions(+), 43 deletions(-) delete mode 100755 scripts/ci/ci-run-sqllogic-tests-public-s3.sh diff --git a/.github/actions/test_sqllogic_standalone_linux/action.yml b/.github/actions/test_sqllogic_standalone_linux/action.yml index d720dbb6fa5f1..d3748c53e44a3 100644 --- a/.github/actions/test_sqllogic_standalone_linux/action.yml +++ b/.github/actions/test_sqllogic_standalone_linux/action.yml @@ -35,7 +35,6 @@ runs: env: TEST_HANDLERS: ${{ inputs.handlers }} TEST_PARALLEL: ${{ inputs.parallel }} - TEST_EXT_ARGS: ${{ inputs.dirs == 'base' && '--skip_file 05_0016_ddl_stage_public_s3_list.test' || '' }} CACHE_ENABLE_TABLE_META_CACHE: ${{ inputs.enable_table_meta_cache}} run: bash ./scripts/ci/ci-run-sqllogic-tests.sh ${{ inputs.dirs }} @@ -45,6 +44,6 @@ runs: env: TEST_HANDLERS: ${{ inputs.handlers }} TEST_PARALLEL: ${{ inputs.parallel }} - TEST_EXT_ARGS: ${{ inputs.dirs == 'base' && '--skip_file tpcds_spill_1.test,tpcds_spill_2.test,tpcds_spill_3.test,05_0016_ddl_stage_public_s3_list.test' || '--skip_file tpcds_spill_1.test,tpcds_spill_2.test,tpcds_spill_3.test' }} + TEST_EXT_ARGS: '--skip_file tpcds_spill_1.test,tpcds_spill_2.test,tpcds_spill_3.test' CACHE_ENABLE_TABLE_META_CACHE: ${{ inputs.enable_table_meta_cache}} run: bash ./scripts/ci/ci-run-sqllogic-tests-native.sh ${{ inputs.dirs }} diff --git a/.github/workflows/reuse.sqllogic.yml b/.github/workflows/reuse.sqllogic.yml index 6eceb1ec0b767..c189994243116 100644 --- a/.github/workflows/reuse.sqllogic.yml +++ b/.github/workflows/reuse.sqllogic.yml @@ -81,23 +81,6 @@ jobs: with: name: test-sqllogic-standalone-${{ matrix.tests.dirs }}-${{ matrix.handler }} - public_s3_stage_list: - runs-on: - - self-hosted - - ${{ inputs.runner_arch }} - - Linux - - 2c8g - - "${{ inputs.runner_provider }}" - steps: - - uses: actions/checkout@v4 - - uses: ./.github/actions/setup_test - with: - artifacts: sqllogictests,meta,query - - name: Run public S3 stage list test (insecure disabled) - timeout-minutes: 10 - shell: bash - run: bash ./scripts/ci/ci-run-sqllogic-tests-public-s3.sh - standalone_udf_server: runs-on: - self-hosted diff --git a/scripts/ci/ci-run-sqllogic-tests-public-s3.sh b/scripts/ci/ci-run-sqllogic-tests-public-s3.sh deleted file mode 100755 index de595e27902b9..0000000000000 --- a/scripts/ci/ci-run-sqllogic-tests-public-s3.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -# Copyright 2020-2021 The Databend Authors. -# SPDX-License-Identifier: Apache-2.0. - -set -e - -# Run this test with insecure disabled so external S3 stages don't load credentials -# from the environment and can access public buckets via unsigned requests. -export STORAGE_ALLOW_INSECURE=false - -echo "Starting standalone DatabendQuery and DatabendMeta" -./scripts/ci/deploy/databend-query-standalone.sh - -TEST_HANDLERS=${TEST_HANDLERS:-"http"} -BUILD_PROFILE=${BUILD_PROFILE:-debug} - -echo "Starting databend-sqllogic tests" -target/${BUILD_PROFILE}/databend-sqllogictests \ - --handlers ${TEST_HANDLERS} \ - --run_file tests/sqllogictests/suites/base/05_ddl/05_0016_ddl_stage_public_s3_list.test \ - --enable_sandbox \ - --parallel 1 \ - ${TEST_EXT_ARGS} - diff --git a/src/query/sql/src/planner/binder/ddl/stage.rs b/src/query/sql/src/planner/binder/ddl/stage.rs index 3b726db57c3fd..230272c0e6fb8 100644 --- a/src/query/sql/src/planner/binder/ddl/stage.rs +++ b/src/query/sql/src/planner/binder/ddl/stage.rs @@ -20,6 +20,7 @@ use databend_common_exception::Result; use databend_common_meta_app::principal::FileFormatOptionsReader; use databend_common_meta_app::principal::FileFormatParams; use databend_common_meta_app::principal::StageInfo; +use databend_common_meta_app::storage::StorageParams; use databend_common_storage::init_operator; use super::super::copy_into_table::resolve_stage_location; @@ -89,6 +90,17 @@ impl Binder { ) .await?; + // External S3 stages don't load credentials by default; `role_arn` opts into assume-role. + let stage_storage = match stage_storage { + StorageParams::S3(mut cfg) => { + if cfg.role_arn.is_empty() { + cfg.disable_credential_loader = true; + } + StorageParams::S3(cfg) + } + v => v, + }; + // Check the storage params via init operator. let _ = init_operator(&stage_storage).map_err(|err| { ErrorCode::InvalidConfig(format!( From 83a662e247eed4c96ac8442f950ddf2b59b60f00 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Tue, 23 Dec 2025 20:34:35 +0800 Subject: [PATCH 10/10] fix(stage): normalize external S3 stage creds --- src/common/storage/src/stage.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/common/storage/src/stage.rs b/src/common/storage/src/stage.rs index 6249648de87b9..5a9c1137fb004 100644 --- a/src/common/storage/src/stage.rs +++ b/src/common/storage/src/stage.rs @@ -24,6 +24,7 @@ use databend_common_exception::Result; use databend_common_meta_app::principal::StageInfo; use databend_common_meta_app::principal::StageType; use databend_common_meta_app::principal::UserIdentity; +use databend_common_meta_app::storage::StorageParams; use futures::Stream; use futures::StreamExt; use futures::TryStreamExt; @@ -89,7 +90,18 @@ impl StageFileInfo { pub fn init_stage_operator(stage_info: &StageInfo) -> Result { if stage_info.stage_type == StageType::External { - Ok(init_operator(&stage_info.stage_params.storage)?) + // External S3 stages don't load credentials by default; `role_arn` opts into assume-role. + let storage = match stage_info.stage_params.storage.clone() { + StorageParams::S3(mut cfg) => { + if cfg.role_arn.is_empty() { + cfg.disable_credential_loader = true; + } + StorageParams::S3(cfg) + } + v => v, + }; + + Ok(init_operator(&storage)?) } else { let stage_prefix = stage_info.stage_prefix(); let param = DataOperator::instance()