diff --git a/.github/workflows/CloudTesting.yml b/.github/workflows/CloudTesting.yml index ccdefef0..833595fe 100644 --- a/.github/workflows/CloudTesting.yml +++ b/.github/workflows/CloudTesting.yml @@ -57,4 +57,4 @@ jobs: AWS_DEFAULT_REGION: ${{secrets.S3_ICEBERG_TEST_USER_REGION}} ICEBERG_AWS_REMOTE_AVAILABLE: 1 run: | - make test_release \ No newline at end of file + make test_release diff --git a/.github/workflows/CodeQuality.yml b/.github/workflows/CodeQuality.yml index db65f565..8918d414 100644 --- a/.github/workflows/CodeQuality.yml +++ b/.github/workflows/CodeQuality.yml @@ -44,6 +44,7 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 + submodules: 'true' - name: Install shell: bash @@ -61,8 +62,3 @@ jobs: black --version make format-check-silent - - name: Generated Check - shell: bash - run: | - make generate-files - git diff --exit-code \ No newline at end of file diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml index 7aabeb11..4bb021b3 100644 --- a/.github/workflows/LocalTesting.yml +++ b/.github/workflows/LocalTesting.yml @@ -27,16 +27,48 @@ jobs: sudo apt-get install -y -qq software-properties-common sudo add-apt-repository ppa:git-core/ppa sudo apt-get update -y -qq - sudo apt-get install -y -qq ninja-build make gcc-multilib g++-multilib libssl-dev wget openjdk-8-jdk zip maven unixodbc-dev libc6-dev-i386 lib32readline6-dev libssl-dev libcurl4-gnutls-dev libexpat1-dev gettext unzip build-essential checkinstall libffi-dev curl libz-dev openssh-client + sudo apt-get install -y -qq \ + ninja-build \ + make gcc-multilib \ + g++-multilib \ + libssl-dev \ + wget \ + openjdk-8-jdk \ + zip \ + maven \ + unixodbc-dev \ + libc6-dev-i386 \ + lib32readline6-dev \ + libssl-dev \ + libcurl4-gnutls-dev \ + libexpat1-dev \ + gettext \ + unzip \ + build-essential \ + checkinstall \ + libffi-dev \ + curl \ + libz-dev \ + openssh-client sudo apt-get install -y -qq tar pkg-config sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose sudo chmod +x /usr/local/bin/docker-compose + - name: Install CMake 3.x + run: | + sudo apt-get remove -y cmake cmake-data + sudo apt-get install --allow-downgrades -y -qq 'cmake=3.*' 'cmake-data=3.*' + - uses: actions/checkout@v4 with: fetch-depth: 0 submodules: 'true' + - name: Check installed versions + run: | + ninja --version + cmake --version + - name: Setup vcpkg uses: lukka/run-vcpkg@v11.1 with: diff --git a/.github/workflows/PolarisTesting.yml b/.github/workflows/PolarisTesting.yml new file mode 100644 index 00000000..66ee86ce --- /dev/null +++ b/.github/workflows/PolarisTesting.yml @@ -0,0 +1,135 @@ +name: Local Polaris Testing +on: [push, pull_request,repository_dispatch] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} + cancel-in-progress: true +defaults: + run: + shell: bash + +env: + BASE_BRANCH: ${{ github.base_ref || (endsWith(github.ref, '_feature') && 'feature' || 'main') }} + CMAKE_POLICY_VERSION_MINIMUM: 3.5 + +jobs: + rest: + name: Test against Polaris Catalog + runs-on: ubuntu-latest + env: + VCPKG_TARGET_TRIPLET: 'x64-linux' + GEN: ninja + VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake + PIP_BREAK_SYSTEM_PACKAGES: 1 + + steps: + - name: Install required ubuntu packages + run: | + sudo apt-get update -y -qq + sudo apt-get install -y -qq software-properties-common + sudo add-apt-repository ppa:git-core/ppa + sudo apt-get update -y -qq + sudo apt-get install -y -qq \ + ninja-build \ + make gcc-multilib \ + g++-multilib \ + libssl-dev \ + wget \ + openjdk-8-jdk \ + zip \ + maven \ + unixodbc-dev \ + libc6-dev-i386 \ + lib32readline6-dev \ + libssl-dev \ + libcurl4-gnutls-dev \ + libexpat1-dev \ + gettext \ + unzip \ + build-essential \ + checkinstall \ + libffi-dev \ + curl \ + libz-dev \ + openssh-client + sudo apt-get install -y -qq tar pkg-config + sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose + sudo chmod +x /usr/local/bin/docker-compose + + - name: Install CMake 3.x + run: | + sudo apt-get remove -y cmake cmake-data + sudo apt-get install --allow-downgrades -y -qq 'cmake=3.*' 'cmake-data=3.*' + + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: 'true' + + - name: Setup vcpkg + uses: lukka/run-vcpkg@v11.1 + with: + vcpkgGitCommitId: 5e5d0e1cd7785623065e77eff011afdeec1a3574 + + - name: Setup Ccache + uses: hendrikmuhs/ccache-action@main + continue-on-error: true + + - name: Build extension + env: + GEN: ninja + STATIC_LIBCPP: 1 + run: | + make release + + - name: Set up for Polaris + run: | + # install java + sudo apt install -y -qq openjdk-21-jre-headless + sudo apt install -y -qq openjdk-21-jdk-headless + # install python virtual environment (is this needed?) + sudo apt-get install -y -qq python3-venv + + - name: Wait for polaris initialization + env: + JAVA_HOME: /usr/lib/jvm/java-21-openjdk-amd64 + run: | + make setup_polaris_ci + # let polaris initialize + max_attempts=50 + attempt=1 + while ! (curl -sf http://localhost:8182/healthcheck || curl -sf http://localhost:8182/q/health); do + if [ $attempt -gt $max_attempts ]; then + echo "Polaris failed to initialize after $max_attempts attempts" + exit 1 + fi + echo "Waiting for Polaris to initialize (attempt $attempt/$max_attempts)..." + sleep 5 + attempt=$((attempt + 1)) + done + echo "Polaris is healthy" + + - name: Generate Polaris Data + run: | + python3 -m venv . + source ./bin/activate + python3 -m pip install poetry + python3 -m pip install pyspark==3.5.0 + python3 -m pip install duckdb + python3 scripts/polaris/get_polaris_root_creds.py + # needed for setup_polaris_catalog.sh + export POLARIS_ROOT_ID=$(cat polaris_root_id.txt) + export POLARIS_ROOT_SECRET=$(cat polaris_root_password.txt) + cd polaris_catalog && ../scripts/polaris/setup_polaris_catalog.sh > user_credentials.json + cd .. + python3 scripts/polaris/get_polaris_client_creds.py + export POLARIS_CLIENT_ID=$(cat polaris_client_id.txt) + export POLARIS_CLIENT_SECRET=$(cat polaris_client_secret.txt) + python3 scripts/data_generators/generate_data.py polaris + + - name: Test with rest catalog + env: + POLARIS_SERVER_AVAILABLE: 1 + run: | + export POLARIS_CLIENT_ID=$(cat polaris_client_id.txt) + export POLARIS_CLIENT_SECRET=$(cat polaris_client_secret.txt) + make test_release \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index f23c448f..a1eae37b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8.12) +cmake_minimum_required(VERSION 3.5...3.29) # Set extension name here set(TARGET_NAME iceberg) @@ -16,7 +16,7 @@ set(EXTENSION_SOURCES src/iceberg_manifest.cpp src/manifest_reader.cpp src/catalog_api.cpp - src/catalog_utils.cpp + src/catalog_utils.cpp src/common/utils.cpp src/common/url_utils.cpp src/common/schema.cpp @@ -34,8 +34,7 @@ set(EXTENSION_SOURCES src/storage/irc_table_entry.cpp src/storage/irc_table_set.cpp src/storage/irc_transaction.cpp - src/storage/irc_transaction_manager.cpp -) + src/storage/irc_transaction_manager.cpp) add_library(${EXTENSION_NAME} STATIC ${EXTENSION_SOURCES}) @@ -46,14 +45,20 @@ find_package(CURL REQUIRED) find_package(AWSSDK REQUIRED COMPONENTS core sso sts) include_directories(${CURL_INCLUDE_DIRS}) +# Reset the TARGET_NAME, the AWS find_package build could bleed into our build - overriding `TARGET_NAME` +set(TARGET_NAME iceberg) + # AWS SDK FROM vcpkg -target_include_directories(${EXTENSION_NAME} PUBLIC $) +target_include_directories(${EXTENSION_NAME} + PUBLIC $) target_link_libraries(${EXTENSION_NAME} PUBLIC ${AWSSDK_LINK_LIBRARIES}) -target_include_directories(${TARGET_NAME}_loadable_extension PRIVATE $) -target_link_libraries(${TARGET_NAME}_loadable_extension ${AWSSDK_LINK_LIBRARIES}) +target_include_directories(${TARGET_NAME}_loadable_extension + PRIVATE $) +target_link_libraries(${TARGET_NAME}_loadable_extension + ${AWSSDK_LINK_LIBRARIES}) # Link dependencies into extension -target_link_libraries(${EXTENSION_NAME} PUBLIC ${CURL_LIBRARIES}) +target_link_libraries(${EXTENSION_NAME} PUBLIC ${CURL_LIBRARIES}) target_link_libraries(${TARGET_NAME}_loadable_extension ${CURL_LIBRARIES}) install( diff --git a/Makefile b/Makefile index 1a0dc7ad..1adf758e 100644 --- a/Makefile +++ b/Makefile @@ -18,10 +18,18 @@ install_requirements: # Custom makefile targets data: data_clean start-rest-catalog - python3 scripts/data_generators/generate_data.py + python3 scripts/data_generators/generate_data.py spark-rest local data_large: data data_clean - python3 scripts/data_generators/generate_data.py + python3 scripts/data_generators/generate_data.py spark-rest local + +# setup polaris server. See PolarisTesting.yml to see instructions for a specific machine. +setup_polaris_ci: + mkdir polaris_catalog + git clone https://github.com/apache/polaris.git polaris_catalog + cd polaris_catalog && ./gradlew clean :polaris-quarkus-server:assemble -Dquarkus.container-image.build=true --no-build-cache + cd polaris_catalog && ./gradlew --stop + cd polaris_catalog && nohup ./gradlew run > polaris-server.log 2> polaris-error.log & data_clean: rm -rf data/generated diff --git a/scripts/data_generators/generate_data.py b/scripts/data_generators/generate_data.py index 5375e63e..8ffcb1b6 100644 --- a/scripts/data_generators/generate_data.py +++ b/scripts/data_generators/generate_data.py @@ -1,17 +1,43 @@ from generate_spark_local.generate_iceberg_spark_local import IcebergSparkLocal from generate_spark_rest.generate_iceberg_spark_rest import IcebergSparkRest +from generate_polaris_rest.generate_iceberg_polaris_rest import IcebergPolarisRest +import sys -# Example usage: -if __name__ == "__main__": +def GenerateSparkRest(): db2 = IcebergSparkRest() conn2 = db2.GetConnection() db2.GenerateTables(conn2) db2.CloseConnection(conn2) del db2 del conn2 + +def GenerateSparkLocal(): db = IcebergSparkLocal() conn = db.GetConnection() db.GenerateTables(conn) db.CloseConnection(conn) del db del conn + +def GeneratePolarisData(): + db = IcebergPolarisRest() + conn = db.GetConnection() + db.GenerateTables(conn) + db.CloseConnection(conn) + del db + del conn + +if __name__ == "__main__": + argv = sys.argv + for i in range(1, len(argv)): + if argv[i] == "polaris": + print("generating polaris data") + GeneratePolarisData() + elif argv[i] == "local": + print("generating local iceberg data") + GenerateSparkLocal() + elif argv[i] == "spark-rest": + print("generating local iceberg REST data") + GenerateSparkRest() + else: + print(f"{argv[i]} not recognized, skipping") diff --git a/scripts/data_generators/generate_polaris_rest/__init__.py b/scripts/data_generators/generate_polaris_rest/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/scripts/data_generators/generate_polaris_rest/__init__.py @@ -0,0 +1 @@ + diff --git a/scripts/data_generators/generate_polaris_rest/generate_iceberg_polaris_rest.py b/scripts/data_generators/generate_polaris_rest/generate_iceberg_polaris_rest.py new file mode 100644 index 00000000..c548af16 --- /dev/null +++ b/scripts/data_generators/generate_polaris_rest/generate_iceberg_polaris_rest.py @@ -0,0 +1,140 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark.sql import SparkSession + +#!/usr/bin/python3 +import pyspark +import pyspark.sql +import sys +import duckdb +import os +from pyspark import SparkContext +from pathlib import Path +import duckdb +import shutil + +DATA_GENERATION_DIR = f"./data/generated/iceberg/polaris-rest/" +SCRIPT_DIR = f"./scripts/data_generators/" +INTERMEDIATE_DATA = "./data/generated/intermediates/polaris-rest/" +PARQUET_SRC_FILE = f"scripts/data_generators/tmp_data/tmp.parquet" + + +class IcebergPolarisRest: + def __init__(self): + pass + + ### + ### Configure everyone's favorite apache product + ### + def GetConnection(self): + os.environ["PYSPARK_SUBMIT_ARGS"] = ( + "--packages org.apache.iceberg:iceberg-spark-runtime-3.4_2.12:1.4.2,org.apache.iceberg:iceberg-aws-bundle:1.4.2 pyspark-shell" + ) + + client_id = os.getenv('POLARIS_CLIENT_ID', '') + client_secret = os.getenv('POLARIS_CLIENT_SECRET', '') + os.environ["AWS_REGION"] = "us-east-1" + os.environ["AWS_ACCESS_KEY_ID"] = "admin" + os.environ["AWS_SECRET_ACCESS_KEY"] = "password" + + if client_id == '' or client_secret == '': + print("could not find client id or client secret to connect to polaris, aborting") + return + + spark = ( + SparkSession.builder.config("spark.sql.catalog.quickstart_catalog", "org.apache.iceberg.spark.SparkSessionCatalog") + .config( + "spark.jars.packages", + "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1,org.apache.hadoop:hadoop-aws:3.4.0,software.amazon.awssdk:bundle:2.23.19,software.amazon.awssdk:url-connection-client:2.23.19", + ) + .config('spark.sql.iceberg.vectorization.enabled', 'false') + # Configure the 'polaris' catalog as an Iceberg rest catalog + .config("spark.sql.catalog.quickstart_catalog.type", "rest") + .config("spark.sql.catalog.quickstart_catalog", "org.apache.iceberg.spark.SparkCatalog") + # Specify the rest catalog endpoint + .config("spark.sql.catalog.quickstart_catalog.uri", "http://localhost:8181/api/catalog") + # Enable token refresh + .config("spark.sql.catalog.quickstart_catalog.token-refresh-enabled", "true") + # specify the client_id:client_secret pair + .config("spark.sql.catalog.quickstart_catalog.credential", f"{client_id}:{client_secret}") + # Set the warehouse to the name of the catalog we created + .config("spark.sql.catalog.quickstart_catalog.warehouse", "quickstart_catalog") + # Scope set to PRINCIPAL_ROLE:ALL + .config("spark.sql.catalog.quickstart_catalog.scope", 'PRINCIPAL_ROLE:ALL') + # Enable access credential delegation + .config("spark.sql.catalog.quickstart_catalog.header.X-Iceberg-Access-Delegation", 'vended-credentials') + .config("spark.sql.catalog.quickstart_catalog.io-impl", "org.apache.iceberg.io.ResolvingFileIO") + .config("spark.sql.catalog.quickstart_catalog.s3.region", "us-west-2") + .config("spark.history.fs.logDirectory", "/home/iceberg/spark-events") + ).getOrCreate() + spark.sql("USE quickstart_catalog") + spark.sql("CREATE NAMESPACE IF NOT EXISTS default") + spark.sql("USE NAMESPACE default") + return spark + + def GetSQLFiles(self, table_dir): + sql_files = [f for f in os.listdir(table_dir) if f.endswith('.sql')] # Find .sql files + sql_files.sort() # Order matters obviously # Store results + return sql_files + + def GetTableDirs(self): + dir = "./scripts/data_generators/generate_polaris_rest/" + subdirectories = [d for d in os.listdir(dir) if os.path.isdir(dir + d) and d != "__pycache__"] + return subdirectories + + def GetSetupFile(self, dir): + setup_files = [f for f in os.listdir(dir) if 'setup' in f.lower()] + if len(setup_files) == 0: + return "" + return setup_files[0] + + def GenerateTPCH(self, con): + duckdb_con = duckdb.connect() + duckdb_con.execute("call dbgen(sf=1)") + + for tbl in ['lineitem', 'customer', 'nation', 'orders', 'part', 'partsupp', 'region', 'supplier']: + create_statement = f""" + CREATE or REPLACE TABLE default.{tbl} + TBLPROPERTIES ( + 'format-version'='2', + 'write.update.mode'='merge-on-read' + ) + AS SELECT * FROM parquet_file_view; + """ + duckdb_con.execute(f"copy {tbl} to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)") + con.read.parquet(PARQUET_SRC_FILE).createOrReplaceTempView('parquet_file_view') + con.sql(create_statement) + + def GenerateTables(self, con): + # Generate the tpch tables + self.GenerateTPCH(con) + con.sql("CREATE NAMESPACE IF NOT EXISTS COLLADO_TEST") + con.sql("USE NAMESPACE COLLADO_TEST") + con.sql( + """ + CREATE TABLE IF NOT EXISTS quickstart_table ( + id BIGINT, data STRING + ) + USING ICEBERG + """ + ) + con.sql("INSERT INTO quickstart_table VALUES (1, 'some data'), (2, 'more data'), (3, 'yet more data')") + + + def CloseConnection(self, con): + del con diff --git a/scripts/data_generators/tmp_data/tmp.parquet b/scripts/data_generators/tmp_data/tmp.parquet index 3600bba4..eb5a4bbf 100644 Binary files a/scripts/data_generators/tmp_data/tmp.parquet and b/scripts/data_generators/tmp_data/tmp.parquet differ diff --git a/scripts/polaris/get_polaris_client_creds.py b/scripts/polaris/get_polaris_client_creds.py new file mode 100644 index 00000000..c940156f --- /dev/null +++ b/scripts/polaris/get_polaris_client_creds.py @@ -0,0 +1,27 @@ +import re +import os + +log_content = "" +# Read the log file (hopefully it isn't too big) +with open("polaris_catalog/user_credentials.json", "r") as file: + log_content = file.read() + +# Regular expression to capture the credentials +match = re.search(r"{\"clientId\": \"(\w+)\", \"clientSecret\": \"(\w+)\"}", log_content) + +if match: + clientId = match.group(1) + clientSecret = match.group(2) + if clientId and clientSecret: + # Write client_id and client_secret to separate files + with open("polaris_client_id.txt", "w") as id_file: + print(f"clientId {clientId}") + id_file.write(clientId) + + with open("polaris_client_secret.txt", "w") as secret_file: + print(f"clientSecret {clientSecret}") + secret_file.write(clientSecret) + +else: + print("Credentials not found in the log file.") + exit(1) diff --git a/scripts/polaris/get_polaris_root_creds.py b/scripts/polaris/get_polaris_root_creds.py new file mode 100644 index 00000000..d3d5f6ca --- /dev/null +++ b/scripts/polaris/get_polaris_root_creds.py @@ -0,0 +1,24 @@ +import re +import os + +# Read the log file (hopefully it isn't too big) +with open("polaris_catalog/polaris-server.log", "r") as file: + log_content = file.read() + +# Regular expression to capture the credentials +match = re.search(r"realm: POLARIS root principal credentials: (\w+):(\w+)", log_content) + +if match: + root_user = match.group(1) + root_password = match.group(2) + if root_user and root_password: + # Write client_id and client_secret to separate files + with open("polaris_root_id.txt", "w") as id_file: + id_file.write(root_user) + + with open("polaris_root_password.txt", "w") as secret_file: + secret_file.write(root_password) + +else: + print("Credentials not found in the log file.") + exit(1) diff --git a/scripts/polaris/setup_polaris_catalog.py b/scripts/polaris/setup_polaris_catalog.py new file mode 100644 index 00000000..65bcf525 --- /dev/null +++ b/scripts/polaris/setup_polaris_catalog.py @@ -0,0 +1,118 @@ +import os +from polaris.catalog.api.iceberg_catalog_api import IcebergCatalogAPI +from polaris.catalog.api.iceberg_o_auth2_api import IcebergOAuth2API +from polaris.catalog.api_client import ApiClient as CatalogApiClient +from polaris.catalog.api_client import Configuration as CatalogApiClientConfiguration + +# some of this is from https://github.com/apache/polaris/blob/e32ef89bb97642f2ac9a4db82252a4fcf7aa0039/getting-started/spark/notebooks/SparkPolaris.ipynb +polaris_credential = 'root:s3cr3t' # pragma: allowlist secret + +client_id = os.getenv('POLARIS_ROOT_ID', '') +client_secret = os.getenv('POLARIS_ROOT_SECRET', '') + +if client_id == '' or client_secret == '': + Print("could not find polaris root id or polaris root secret") +client = CatalogApiClient( + CatalogApiClientConfiguration(username=client_id, password=client_secret, host='http://polaris:8181/api/catalog') +) + +oauth_api = IcebergOAuth2API(client) +token = oauth_api.get_token( + scope='PRINCIPAL_ROLE:ALL', + client_id=client_id, + client_secret=client_secret, + grant_type='client_credentials', + _headers={'realm': 'default-realm'}, +) + +# create a catalog +from polaris.management import * + +client = ApiClient(Configuration(access_token=token.access_token, host='http://polaris:8181/api/management/v1')) +root_client = PolarisDefaultApi(client) + +storage_conf = FileStorageConfigInfo(storage_type="FILE", allowed_locations=["file:///tmp"]) +catalog_name = 'polaris_demo' +catalog = Catalog( + name=catalog_name, + type='INTERNAL', + properties={"default-base-location": "file:///tmp/polaris/"}, + storage_config_info=storage_conf, +) +catalog.storage_config_info = storage_conf +root_client.create_catalog(create_catalog_request=CreateCatalogRequest(catalog=catalog)) +resp = root_client.get_catalog(catalog_name=catalog.name) + + +# UTILITY FUNCTIONS +# Creates a principal with the given name +def create_principal(api, principal_name): + principal = Principal(name=principal_name, type="SERVICE") + try: + principal_result = api.create_principal(CreatePrincipalRequest(principal=principal)) + return principal_result + except ApiException as e: + if e.status == 409: + return api.rotate_credentials(principal_name=principal_name) + else: + raise e + + +# Create a catalog role with the given name +def create_catalog_role(api, catalog, role_name): + catalog_role = CatalogRole(name=role_name) + try: + api.create_catalog_role( + catalog_name=catalog.name, create_catalog_role_request=CreateCatalogRoleRequest(catalog_role=catalog_role) + ) + return api.get_catalog_role(catalog_name=catalog.name, catalog_role_name=role_name) + except ApiException as e: + return api.get_catalog_role(catalog_name=catalog.name, catalog_role_name=role_name) + else: + raise e + + +# Create a principal role with the given name +def create_principal_role(api, role_name): + principal_role = PrincipalRole(name=role_name) + try: + api.create_principal_role(CreatePrincipalRoleRequest(principal_role=principal_role)) + return api.get_principal_role(principal_role_name=role_name) + except ApiException as e: + return api.get_principal_role(principal_role_name=role_name) + + +# Create a bunch of new roles + +# Create the engineer_principal +engineer_principal = create_principal(root_client, "collado") + +# Create the principal role +engineer_role = create_principal_role(root_client, "engineer") + +# Create the catalog role +manager_catalog_role = create_catalog_role(root_client, catalog, "manage_catalog") + +# Grant the catalog role to the principal role +# All principals in the principal role have the catalog role's privileges +root_client.assign_catalog_role_to_principal_role( + principal_role_name=engineer_role.name, + catalog_name=catalog.name, + grant_catalog_role_request=GrantCatalogRoleRequest(catalog_role=manager_catalog_role), +) + +# Assign privileges to the catalog role +# Here, we grant CATALOG_MANAGE_CONTENT +root_client.add_grant_to_catalog_role( + catalog.name, + manager_catalog_role.name, + AddGrantRequest( + grant=CatalogGrant(catalog_name=catalog.name, type='catalog', privilege=CatalogPrivilege.CATALOG_MANAGE_CONTENT) + ), +) + +# Assign the principal role to the principal +root_client.assign_principal_role( + engineer_principal.principal.name, + grant_principal_role_request=GrantPrincipalRoleRequest(principal_role=engineer_role), +) diff --git a/scripts/polaris/setup_polaris_catalog.sh b/scripts/polaris/setup_polaris_catalog.sh new file mode 100755 index 00000000..03864aed --- /dev/null +++ b/scripts/polaris/setup_polaris_catalog.sh @@ -0,0 +1,61 @@ +# TODO: use the python module to execute these steps. +# Seems like the python module is not yet publicly available/installable, so unsure what to do about this. + +./polaris \ + --client-id ${POLARIS_ROOT_ID} \ + --client-secret ${POLARIS_ROOT_SECRET} \ + catalogs \ + create \ + --storage-type FILE \ + --default-base-location file://${PWD}/storage_files \ + quickstart_catalog + +\ +./polaris \ + --client-id ${POLARIS_ROOT_ID} \ + --client-secret ${POLARIS_ROOT_SECRET} \ + principals \ + create \ + quickstart_user + +./polaris \ + --client-id ${POLARIS_ROOT_ID} \ + --client-secret ${POLARIS_ROOT_SECRET} \ + principal-roles \ + create \ + quickstart_user_role + +./polaris \ + --client-id ${POLARIS_ROOT_ID} \ + --client-secret ${POLARIS_ROOT_SECRET} \ + catalog-roles \ + create \ + --catalog quickstart_catalog \ + quickstart_catalog_role + +./polaris \ + --client-id ${POLARIS_ROOT_ID} \ + --client-secret ${POLARIS_ROOT_SECRET} \ + principal-roles \ + grant \ + --principal quickstart_user \ + quickstart_user_role + +./polaris \ + --client-id ${POLARIS_ROOT_ID} \ + --client-secret ${POLARIS_ROOT_SECRET} \ + catalog-roles \ + grant \ + --catalog quickstart_catalog \ + --principal-role quickstart_user_role \ + quickstart_catalog_role + +./polaris \ + --client-id ${POLARIS_ROOT_ID} \ + --client-secret ${POLARIS_ROOT_SECRET} \ + privileges \ + catalog \ + grant \ + --catalog quickstart_catalog \ + --catalog-role quickstart_catalog_role \ + CATALOG_MANAGE_CONTENT diff --git a/scripts/requirements-polaris.txt b/scripts/requirements-polaris.txt new file mode 100644 index 00000000..af385d6b --- /dev/null +++ b/scripts/requirements-polaris.txt @@ -0,0 +1,2 @@ +pydantic==2.11.1 +pyspark==3.5.0 \ No newline at end of file diff --git a/src/catalog_api.cpp b/src/catalog_api.cpp index 9c8c9fde..115a74a4 100644 --- a/src/catalog_api.cpp +++ b/src/catalog_api.cpp @@ -39,7 +39,6 @@ static string GetTableMetadata(ClientContext &context, IRCatalog &catalog, const static string GetTableMetadataCached(ClientContext &context, IRCatalog &catalog, const string &schema, const string &table, const string &secret_name) { - struct curl_slist *extra_headers = NULL; auto url = catalog.GetBaseUrl(); url.AddPathComponent(catalog.prefix); url.AddPathComponent("namespaces"); @@ -71,35 +70,152 @@ static IRCAPIColumnDefinition ParseColumnDefinition(yyjson_val *column_def) { return result; } +static void ParseConfigOptions(yyjson_val *config, case_insensitive_map_t &options) { + //! Set of recognized config parameters and the duckdb secret option that matches it. + static const case_insensitive_map_t config_to_option = {{"s3.access-key-id", "key_id"}, + {"s3.secret-access-key", "secret"}, + {"s3.session-token", "session_token"}, + {"s3.region", "region"}, + {"s3.endpoint", "endpoint"}}; + + auto config_size = yyjson_obj_size(config); + if (!config || config_size == 0) { + return; + } + for (auto &it : config_to_option) { + auto &key = it.first; + auto &option = it.second; + + auto *item = yyjson_obj_get(config, key.c_str()); + if (item) { + options[option] = yyjson_get_str(item); + } + } + auto *access_style = yyjson_obj_get(config, "s3.path-style-access"); + if (access_style) { + string value = yyjson_get_str(access_style); + bool path_style; + if (value == "true") { + path_style = true; + } else if (value == "false") { + path_style = false; + } else { + throw InvalidInputException("Unexpected value ('%s') for 's3.path-style-access' in 'config' property", + value); + } + options["use_ssl"] = Value(!path_style); + if (path_style) { + options["url_style"] = "path"; + } + } + + auto endpoint_it = options.find("endpoint"); + if (endpoint_it == options.end()) { + return; + } + auto endpoint = endpoint_it->second.ToString(); + if (StringUtil::StartsWith(endpoint, "http://")) { + endpoint = endpoint.substr(7, std::string::npos); + } + if (StringUtil::EndsWith(endpoint, "/")) { + endpoint = endpoint.substr(0, endpoint.size() - 1); + } + endpoint_it->second = endpoint; +} + IRCAPITableCredentials IRCAPI::GetTableCredentials(ClientContext &context, IRCatalog &catalog, const string &schema, - const string &table, IRCCredentials credentials) { + const string &table, const string &secret_base_name) { IRCAPITableCredentials result; string api_result = GetTableMetadataCached(context, catalog, schema, table, catalog.secret_name); std::unique_ptr doc(ICUtils::api_result_to_doc(api_result)); auto *root = yyjson_doc_get_root(doc.get()); - auto *warehouse_credentials = yyjson_obj_get(root, "config"); - auto credential_size = yyjson_obj_size(warehouse_credentials); auto catalog_credentials = IRCatalog::GetSecret(context, catalog.secret_name); - if (warehouse_credentials && credential_size > 0) { - result.key_id = IcebergUtils::TryGetStrFromObject(warehouse_credentials, "s3.access-key-id", false); - result.secret = IcebergUtils::TryGetStrFromObject(warehouse_credentials, "s3.secret-access-key", false); - result.session_token = IcebergUtils::TryGetStrFromObject(warehouse_credentials, "s3.session-token", false); - if (catalog_credentials) { - auto kv_secret = dynamic_cast(*catalog_credentials->secret); - auto region = kv_secret.TryGetValue("region").ToString(); - result.region = region; + + // Mapping from config key to a duckdb secret option + + case_insensitive_map_t config_options; + auto *config_val = yyjson_obj_get(root, "config"); + if (config_val && catalog_credentials) { + auto kv_secret = dynamic_cast(*catalog_credentials->secret); + for (auto &option : kv_secret.secret_map) { + config_options.emplace(option); + } + } + ParseConfigOptions(config_val, config_options); + + auto *storage_credentials = yyjson_obj_get(root, "storage-credentials"); + auto storage_credentials_size = yyjson_arr_size(storage_credentials); + if (storage_credentials && storage_credentials_size > 0) { + yyjson_val *storage_credential; + size_t index, max; + yyjson_arr_foreach(storage_credentials, index, max, storage_credential) { + auto *sc_prefix = yyjson_obj_get(storage_credential, "prefix"); + if (!sc_prefix) { + throw InvalidInputException("required property 'prefix' is missing from the StorageCredential schema"); + } + + CreateSecretInfo create_secret_info(OnCreateConflict::REPLACE_ON_CONFLICT, SecretPersistType::TEMPORARY); + auto prefix_string = yyjson_get_str(sc_prefix); + if (!prefix_string) { + throw InvalidInputException("property 'prefix' of StorageCredential is NULL"); + } + create_secret_info.scope.push_back(string(prefix_string)); + create_secret_info.name = StringUtil::Format("%s_%d_%s", secret_base_name, index, prefix_string); + create_secret_info.type = "s3"; + create_secret_info.provider = "config"; + create_secret_info.storage_type = "memory"; + create_secret_info.options = config_options; + + auto *sc_config = yyjson_obj_get(storage_credential, "config"); + ParseConfigOptions(sc_config, create_secret_info.options); + result.storage_credentials.push_back(create_secret_info); } } + + if (result.storage_credentials.empty() && !config_options.empty()) { + //! Only create a secret out of the 'config' if there are no 'storage-credentials' + result.config = + make_uniq(OnCreateConflict::REPLACE_ON_CONFLICT, SecretPersistType::TEMPORARY); + auto &config = *result.config; + config.options = config_options; + config.name = secret_base_name; + config.type = "s3"; + config.provider = "config"; + config.storage_type = "memory"; + } + return result; } -string IRCAPI::GetToken(ClientContext &context, string id, string secret, string endpoint) { - string post_data = - "grant_type=client_credentials&client_id=" + id + "&client_secret=" + secret + "&scope=PRINCIPAL_ROLE:ALL"; - string api_result = APIUtils::PostRequest(context, endpoint + "/v1/oauth/tokens", post_data); +string IRCAPI::GetToken(ClientContext &context, const string &uri, const string &id, const string &secret, + const string &endpoint, const string &scope) { + vector parameters; + parameters.push_back(StringUtil::Format("%s=%s", "grant_type", "client_credentials")); + parameters.push_back(StringUtil::Format("%s=%s", "client_id", id)); + parameters.push_back(StringUtil::Format("%s=%s", "client_secret", secret)); + parameters.push_back(StringUtil::Format("%s=%s", "scope", scope)); + + string post_data = StringUtil::Format("%s", StringUtil::Join(parameters, "&")); + string api_result = APIUtils::PostRequest(context, uri, post_data); + //! FIXME: the oauth/tokens endpoint returns, on success; + // { 'access_token', 'token_type', 'expires_in', , 'refresh_token', 'scope'} std::unique_ptr doc(ICUtils::api_result_to_doc(api_result)); auto *root = yyjson_doc_get_root(doc.get()); - return IcebergUtils::TryGetStrFromObject(root, "access_token"); + auto access_token_val = yyjson_obj_get(root, "access_token"); + auto token_type_val = yyjson_obj_get(root, "token_type"); + if (!access_token_val) { + throw IOException("OAuthTokenResponse is missing required property 'access_token'"); + } + if (!token_type_val) { + throw IOException("OAuthTokenResponse is missing required property 'token_type'"); + } + string token_type = yyjson_get_str(token_type_val); + if (!StringUtil::CIEquals(token_type, "bearer")) { + throw NotImplementedException( + "token_type return value '%s' is not supported, only supports 'bearer' currently.", token_type); + } + string access_token = yyjson_get_str(access_token_val); + return access_token; } static void populateTableMetadata(IRCAPITable &table, yyjson_val *metadata_root) { @@ -127,7 +243,7 @@ static void populateTableMetadata(IRCAPITable &table, yyjson_val *metadata_root) } if (!found) { - throw InternalException("Current schema not found"); + throw InvalidInputException("Current schema not found"); } } @@ -195,6 +311,7 @@ vector IRCAPI::GetSchemas(ClientContext &context, IRCatalog &catal string api_result = APIUtils::GetRequest(context, endpoint_builder, catalog.secret_name, catalog.credentials.token); std::unique_ptr doc(ICUtils::api_result_to_doc(api_result)); auto *root = yyjson_doc_get_root(doc.get()); + //! 'ListNamespacesResponse' auto *schemas = yyjson_obj_get(root, "namespaces"); size_t idx, max; yyjson_val *schema; diff --git a/src/catalog_utils.cpp b/src/catalog_utils.cpp index 50e3dffe..166de528 100644 --- a/src/catalog_utils.cpp +++ b/src/catalog_utils.cpp @@ -50,7 +50,7 @@ string ICUtils::LogicalToIcebergType(const LogicalType &input) { break; } - throw std::runtime_error("Unsupported type: " + input.ToString()); + throw InvalidInputException("Unsupported type: %s", input.ToString()); } string ICUtils::TypeToString(const LogicalType &input) { @@ -240,7 +240,7 @@ yyjson_doc *ICUtils::api_result_to_doc(const string &api_result) { auto *error = yyjson_obj_get(root, "error"); if (error != NULL) { string err_msg = IcebergUtils::TryGetStrFromObject(error, "message"); - throw std::runtime_error(err_msg); + throw InvalidInputException(err_msg); } return doc; } diff --git a/src/common/api_utils.cpp b/src/common/api_utils.cpp index 41042217..723a1e82 100644 --- a/src/common/api_utils.cpp +++ b/src/common/api_utils.cpp @@ -1,7 +1,10 @@ #include "api_utils.hpp" -#include -#include "storage/irc_catalog.hpp" #include "credentials/credential_provider.hpp" +#include "duckdb/common/exception.hpp" +#include "duckdb/common/exception/http_exception.hpp" +#include "duckdb/common/string_util.hpp" +#include "storage/irc_catalog.hpp" +#include namespace duckdb { @@ -45,7 +48,7 @@ string APIUtils::GetRequest(ClientContext &context, const IRCEndpointBuilder &en curl_easy_strerror(res)); if (res != CURLcode::CURLE_OK) { string error = curl_easy_strerror(res); - throw IOException("Curl Request to '%s' failed with error: '%s'", url, error); + throw HTTPException(StringUtil::Format("Curl Request to '%s' failed with error: '%s'", url, error)); } return readBuffer; @@ -116,8 +119,9 @@ string APIUtils::GetRequestAws(ClientContext &context, IRCEndpointBuilder endpoi } else { Aws::StringStream resBody; resBody << res->GetResponseBody().rdbuf(); - throw IOException("Failed to query %s, http error %d thrown. Message: %s", req->GetUri().GetURIString(true), - res->GetResponseCode(), resBody.str()); + throw HTTPException(StringUtil::Format("Failed to query %s, http error %d thrown. Message: %s", + req->GetUri().GetURIString(true), res->GetResponseCode(), + resBody.str())); } } @@ -177,7 +181,7 @@ string APIUtils::DeleteRequest(const string &url, const string &token, curl_slis if (res != CURLcode::CURLE_OK) { string error = curl_easy_strerror(res); - throw IOException("Curl DELETE Request to '%s' failed with error: '%s'", url, error); + throw HTTPException(StringUtil::Format("Curl DELETE Request to '%s' failed with error: '%s'", url, error)); } return readBuffer; @@ -227,7 +231,7 @@ string APIUtils::PostRequest(ClientContext &context, const string &url, const st curl_easy_strerror(res)); if (res != CURLcode::CURLE_OK) { string error = curl_easy_strerror(res); - throw IOException("Curl Request to '%s' failed with error: '%s'", url, error); + throw HTTPException(StringUtil::Format("Curl Request to '%s' failed with error: '%s'", url, error)); } return readBuffer; } diff --git a/src/common/iceberg.cpp b/src/common/iceberg.cpp index f1d3aecf..6c461b18 100644 --- a/src/common/iceberg.cpp +++ b/src/common/iceberg.cpp @@ -64,7 +64,7 @@ unique_ptr IcebergSnapshot::GetParseInfo(yyjson_doc &metadata } else { auto schema = yyjson_obj_get(root, "schema"); if (!schema) { - throw IOException("Neither a valid schema or schemas field was found"); + throw InvalidConfigurationException("Neither a valid schema or schemas field was found"); } auto found_schema_id = IcebergUtils::TryGetNumFromObject(schema, "schema-id"); info.schemas.push_back(schema); @@ -103,7 +103,7 @@ IcebergSnapshot IcebergSnapshot::GetSnapshotById(const string &path, FileSystem auto snapshot = FindSnapshotByIdInternal(info->snapshots, snapshot_id); if (!snapshot) { - throw IOException("Could not find snapshot with id " + to_string(snapshot_id)); + throw InvalidConfigurationException("Could not find snapshot with id " + to_string(snapshot_id)); } return ParseSnapShot(snapshot, info->iceberg_version, info->schema_id, info->schemas, options); @@ -115,7 +115,8 @@ IcebergSnapshot IcebergSnapshot::GetSnapshotByTimestamp(const string &path, File auto snapshot = FindSnapshotByIdTimestampInternal(info->snapshots, timestamp); if (!snapshot) { - throw IOException("Could not find latest snapshots for timestamp " + Timestamp::ToString(timestamp)); + throw InvalidConfigurationException("Could not find latest snapshots for timestamp " + + Timestamp::ToString(timestamp)); } return ParseSnapShot(snapshot, info->iceberg_version, info->schema_id, info->schemas, options); @@ -138,7 +139,7 @@ static string GenerateMetaDataUrl(FileSystem &fs, const string &meta_path, strin } } - throw IOException( + throw InvalidConfigurationException( "Iceberg metadata file not found for table version '%s' using '%s' compression and format(s): '%s'", table_version, options.metadata_compression_codec, options.version_name_format); } @@ -171,7 +172,7 @@ string IcebergSnapshot::GetMetaDataPath(ClientContext &context, const string &pa } if (!UnsafeVersionGuessingEnabled(context)) { // Make sure we're allowed to guess versions - throw IOException( + throw InvalidConfigurationException( "Failed to read iceberg table. No version was provided and no version-hint could be found, globbing the " "filesystem to locate the latest version is disabled by default as this is considered unsafe and could " "result in reading uncommitted data. To enable this use 'SET %s = true;'", @@ -183,7 +184,7 @@ string IcebergSnapshot::GetMetaDataPath(ClientContext &context, const string &pa } string IcebergSnapshot::ReadMetaData(const string &path, FileSystem &fs, const string &metadata_compression_codec) { - if (metadata_compression_codec == "gzip") { + if (metadata_compression_codec == "gzip" || StringUtil::EndsWith(path, "gz.metadata.json")) { return IcebergUtils::GzFileToString(path, fs); } return IcebergUtils::FileToString(path, fs); @@ -195,7 +196,7 @@ IcebergSnapshot IcebergSnapshot::ParseSnapShot(yyjson_val *snapshot, idx_t icebe if (snapshot) { auto snapshot_tag = yyjson_get_type(snapshot); if (snapshot_tag != YYJSON_TYPE_OBJ) { - throw IOException("Invalid snapshot field found parsing iceberg metadata.json"); + throw InvalidConfigurationException("Invalid snapshot field found parsing iceberg metadata.json"); } ret.metadata_compression_codec = options.metadata_compression_codec; if (iceberg_format_version == 1) { @@ -226,9 +227,9 @@ string IcebergSnapshot::GetTableVersionFromHint(const string &meta_path, FileSys try { return version_file_content; } catch (std::invalid_argument &e) { - throw IOException("Iceberg version hint file contains invalid value"); + throw InvalidConfigurationException("Iceberg version hint file contains invalid value"); } catch (std::out_of_range &e) { - throw IOException("Iceberg version hint file contains invalid value"); + throw InvalidConfigurationException("Iceberg version hint file contains invalid value"); } } @@ -262,8 +263,9 @@ string IcebergSnapshot::GuessTableVersion(const string &meta_path, FileSystem &f } } - throw IOException("Could not guess Iceberg table version using '%s' compression and format(s): '%s'", - metadata_compression_codec, version_format); + throw InvalidConfigurationException( + "Could not guess Iceberg table version using '%s' compression and format(s): '%s'", metadata_compression_codec, + version_format); } string IcebergSnapshot::PickTableVersion(vector &found_metadata, string &version_pattern, string &glob) { diff --git a/src/common/schema.cpp b/src/common/schema.cpp index fe800adf..1c2d715f 100644 --- a/src/common/schema.cpp +++ b/src/common/schema.cpp @@ -63,13 +63,13 @@ static LogicalType ParseComplexType(yyjson_val *type) { if (type_str == "map") { return ParseMap(type); } - throw IOException("Invalid field found while parsing field: type"); + throw InvalidConfigurationException("Invalid field found while parsing field: type"); } static LogicalType ParseType(yyjson_val *type) { auto val = yyjson_obj_get(type, "type"); if (!val) { - throw IOException("Invalid field found while parsing field: type"); + throw InvalidConfigurationException("Invalid field found while parsing field: type"); } return ParseTypeValue(val); } @@ -79,7 +79,7 @@ static LogicalType ParseTypeValue(yyjson_val *val) { return ParseComplexType(val); } if (yyjson_get_type(val) != YYJSON_TYPE_STR) { - throw IOException("Invalid field found while parsing field: type"); + throw InvalidConfigurationException("Invalid field found while parsing field: type"); } string type_str = yyjson_get_str(val); @@ -136,7 +136,7 @@ static LogicalType ParseTypeValue(yyjson_val *val) { auto scale = std::stoi(digits[1]); return LogicalType::DECIMAL(width, scale); } - throw IOException("Encountered an unrecognized type in JSON schema: \"%s\"", type_str); + throw InvalidConfigurationException("Encountered an unrecognized type in JSON schema: \"%s\"", type_str); } IcebergColumnDefinition IcebergColumnDefinition::ParseFromJson(yyjson_val *val) { @@ -155,7 +155,7 @@ static vector ParseSchemaFromJson(yyjson_val *schema_js // Assert that the top level 'type' is a struct auto type_str = IcebergUtils::TryGetStrFromObject(schema_json, "type"); if (type_str != "struct") { - throw IOException("Schema in JSON Metadata is invalid"); + throw InvalidConfigurationException("Schema in JSON Metadata is invalid"); } D_ASSERT(yyjson_get_type(schema_json) == YYJSON_TYPE_OBJ); D_ASSERT(IcebergUtils::TryGetStrFromObject(schema_json, "type") == "struct"); @@ -180,7 +180,7 @@ vector IcebergSnapshot::ParseSchema(vector IRCEndpointBuilder::GetParams() { +const std::unordered_map IRCEndpointBuilder::GetParams() { return params; } -std::string IRCEndpointBuilder::GetURL() const { - std::string ret = host; +string IRCEndpointBuilder::GetURL() const { + string ret = host; if (!version.empty()) { ret = ret + "/" + version; } diff --git a/src/common/utils.cpp b/src/common/utils.cpp index 49521dcb..71301358 100644 --- a/src/common/utils.cpp +++ b/src/common/utils.cpp @@ -32,13 +32,13 @@ string IcebergUtils::GetFullPath(const string &iceberg_path, const string &relat return fs.JoinPath(iceberg_path, relative_file_path.substr(found + 1)); } - throw IOException("Did not recognize iceberg path"); + throw InvalidConfigurationException("Did not recognize iceberg path"); } uint64_t IcebergUtils::TryGetNumFromObject(yyjson_val *obj, const string &field) { auto val = yyjson_obj_getn(obj, field.c_str(), field.size()); if (!val || yyjson_get_type(val) != YYJSON_TYPE_NUM) { - throw IOException("Invalid field found while parsing field: " + field); + throw InvalidConfigurationException("Invalid field found while parsing field: " + field); } return yyjson_get_uint(val); } @@ -46,7 +46,7 @@ uint64_t IcebergUtils::TryGetNumFromObject(yyjson_val *obj, const string &field) bool IcebergUtils::TryGetBoolFromObject(yyjson_val *obj, const string &field) { auto val = yyjson_obj_getn(obj, field.c_str(), field.size()); if (!val || yyjson_get_type(val) != YYJSON_TYPE_BOOL) { - throw IOException("Invalid field found while parsing field: " + field); + throw InvalidConfigurationException("Invalid field found while parsing field: " + field); } return yyjson_get_bool(val); } @@ -54,7 +54,7 @@ bool IcebergUtils::TryGetBoolFromObject(yyjson_val *obj, const string &field) { string IcebergUtils::TryGetStrFromObject(yyjson_val *obj, const string &field) { auto val = yyjson_obj_getn(obj, field.c_str(), field.size()); if (!val || yyjson_get_type(val) != YYJSON_TYPE_STR) { - throw IOException("Invalid field found while parsing field: " + field); + throw InvalidConfigurationException("Invalid field found while parsing field: " + field); } return yyjson_get_str(val); } @@ -67,7 +67,7 @@ static TYPE TemplatedTryGetYYJson(yyjson_val *obj, const string &field, TYPE def } else if (!fail_on_missing) { return default_val; } - throw IOException("Invalid field found while parsing field: " + field); + throw InvalidConfigurationException("Invalid field found while parsing field: " + field); } uint64_t IcebergUtils::TryGetNumFromObject(yyjson_val *obj, const string &field, bool fail_on_missing, diff --git a/src/iceberg_extension.cpp b/src/iceberg_extension.cpp index 7522e2ef..f4d5d350 100644 --- a/src/iceberg_extension.cpp +++ b/src/iceberg_extension.cpp @@ -31,7 +31,7 @@ static unique_ptr CreateCatalogSecretFunction(ClientContext &context auto lower_name = StringUtil::Lower(named_param.first); if (lower_name == "key_id" || lower_name == "secret" || lower_name == "endpoint" || - lower_name == "aws_region") { + lower_name == "aws_region" || lower_name == "oauth2_scope" || lower_name == "oauth2_server_uri") { result->secret_map[lower_name] = named_param.second.ToString(); } else { throw InternalException("Unknown named parameter passed to CreateIRCSecretFunction: " + lower_name); @@ -40,8 +40,9 @@ static unique_ptr CreateCatalogSecretFunction(ClientContext &context // Get token from catalog result->secret_map["token"] = - IRCAPI::GetToken(context, result->secret_map["key_id"].ToString(), result->secret_map["secret"].ToString(), - result->secret_map["endpoint"].ToString()); + IRCAPI::GetToken(context, result->secret_map["oauth2_server_uri"].ToString(), + result->secret_map["key_id"].ToString(), result->secret_map["secret"].ToString(), + result->secret_map["endpoint"].ToString(), result->secret_map["oauth2_scope"].ToString()); //! Set redact keys result->redact_keys = {"token", "client_id", "client_secret"}; @@ -64,13 +65,14 @@ static bool SanityCheckGlueWarehouse(string warehouse) { auto bucket_sep = warehouse.find_first_of('/'); bool bucket_sep_correct = bucket_sep == 28; if (!account_id_correct) { - throw IOException("Invalid Glue Catalog Format: '" + warehouse + "'. Expect 12 digits for account_id."); + throw InvalidConfigurationException("Invalid Glue Catalog Format: '" + warehouse + + "'. Expect 12 digits for account_id."); } if (bucket_sep_correct) { return true; } - throw IOException("Invalid Glue Catalog Format: '" + warehouse + - "'. Expected ':s3tablescatalog/"); + throw InvalidConfigurationException("Invalid Glue Catalog Format: '" + warehouse + + "'. Expected ':s3tablescatalog/"); } static unique_ptr IcebergCatalogAttach(StorageExtensionInfo *storage_info, ClientContext &context, @@ -83,6 +85,9 @@ static unique_ptr IcebergCatalogAttach(StorageExtensionInfo *storage_in string service; string endpoint_type; string endpoint; + string oauth2_server_uri; + + auto &oauth2_scope = credentials.oauth2_scope; // check if we have a secret provided string secret_name; @@ -97,11 +102,28 @@ static unique_ptr IcebergCatalogAttach(StorageExtensionInfo *storage_in } else if (lower_name == "endpoint") { endpoint = StringUtil::Lower(entry.second.ToString()); StringUtil::RTrim(endpoint, "/"); + } else if (lower_name == "oauth2_scope") { + oauth2_scope = StringUtil::Lower(entry.second.ToString()); + } else if (lower_name == "oauth2_server_uri") { + oauth2_server_uri = StringUtil::Lower(entry.second.ToString()); } else { throw BinderException("Unrecognized option for PC attach: %s", entry.first); } } auto warehouse = info.path; + + if (oauth2_scope.empty()) { + //! Default to the Polaris scope: 'PRINCIPAL_ROLE:ALL' + oauth2_scope = "PRINCIPAL_ROLE:ALL"; + } + + if (oauth2_server_uri.empty()) { + //! If no oauth2_server_uri is provided, default to the (deprecated) REST API endpoint for it + DUCKDB_LOG_WARN( + context, "iceberg", + "'oauth2_server_uri' is not set, defaulting to deprecated '{endpoint}/v1/oauth/tokens' oauth2_server_uri"); + oauth2_server_uri = StringUtil::Format("%s/v1/oauth/tokens", endpoint); + } auto catalog_type = ICEBERG_CATALOG_TYPE::INVALID; if (endpoint_type == "glue" || endpoint_type == "s3_tables") { @@ -120,8 +142,8 @@ static unique_ptr IcebergCatalogAttach(StorageExtensionInfo *storage_in auto region = kv_secret.TryGetValue("region"); if (region.IsNull()) { - throw IOException("Assumed catalog secret " + secret_entry->secret->GetName() + " for catalog " + name + - " does not have a region"); + throw InvalidConfigurationException("Assumed catalog secret " + secret_entry->secret->GetName() + + " for catalog " + name + " does not have a region"); } switch (catalog_type) { case ICEBERG_CATALOG_TYPE::AWS_S3TABLES: { @@ -137,7 +159,7 @@ static unique_ptr IcebergCatalogAttach(StorageExtensionInfo *storage_in SanityCheckGlueWarehouse(warehouse); break; default: - throw IOException("Unsupported AWS catalog type"); + throw NotImplementedException("Unsupported AWS catalog type"); } auto catalog_host = service + "." + region.ToString() + ".amazonaws.com"; @@ -149,10 +171,11 @@ static unique_ptr IcebergCatalogAttach(StorageExtensionInfo *storage_in // Check no endpoint type has been passed. if (!endpoint_type.empty()) { - throw IOException("Unrecognized endpoint point: %s. Expected either S3_TABLES or GLUE", endpoint_type); + throw InvalidConfigurationException("Unrecognized endpoint point: %s. Expected either S3_TABLES or GLUE", + endpoint_type); } if (endpoint_type.empty() && endpoint.empty()) { - throw IOException("No endpoint type or endpoint provided"); + throw InvalidConfigurationException("No endpoint type or endpoint provided"); } catalog_type = ICEBERG_CATALOG_TYPE::OTHER; @@ -162,16 +185,18 @@ static unique_ptr IcebergCatalogAttach(StorageExtensionInfo *storage_in // if no secret is referenced, this throw auto secret_entry = IRCatalog::GetSecret(context, secret_name); if (!secret_entry) { - throw IOException("No secret found to use with catalog " + name); + throw InvalidConfigurationException("No secret found to use with catalog " + name); } // secret found - read data const auto &kv_secret = dynamic_cast(*secret_entry->secret); Value key_val = kv_secret.TryGetValue("key_id"); Value secret_val = kv_secret.TryGetValue("secret"); CreateSecretInput create_secret_input; + create_secret_input.options["oauth2_server_uri"] = oauth2_server_uri; create_secret_input.options["key_id"] = key_val; create_secret_input.options["secret"] = secret_val; create_secret_input.options["endpoint"] = endpoint; + create_secret_input.options["oauth2_scope"] = oauth2_scope; auto new_secret = CreateCatalogSecretFunction(context, create_secret_input); auto &kv_secret_new = dynamic_cast(*new_secret); Value token = kv_secret_new.TryGetValue("token"); diff --git a/src/iceberg_functions/iceberg_multi_file_reader.cpp b/src/iceberg_functions/iceberg_multi_file_reader.cpp index 3f6a0712..605c794b 100644 --- a/src/iceberg_functions/iceberg_multi_file_reader.cpp +++ b/src/iceberg_functions/iceberg_multi_file_reader.cpp @@ -313,7 +313,7 @@ void IcebergMultiFileReader::CreateColumnMapping(const string &file_name, // Lookup the required column in the local map auto entry = name_map.find("file_row_number"); if (entry == name_map.end()) { - throw IOException("Failed to find the file_row_number column"); + throw InvalidConfigurationException("Failed to find the file_row_number column"); } // Register the column to be scanned from this file diff --git a/src/include/catalog_api.hpp b/src/include/catalog_api.hpp index f4970208..0e8001ff 100644 --- a/src/include/catalog_api.hpp +++ b/src/include/catalog_api.hpp @@ -3,6 +3,7 @@ #include "duckdb/common/types.hpp" #include "duckdb/parser/parsed_data/create_table_info.hpp" +#include "duckdb/parser/parsed_data/create_secret_info.hpp" //#include "storage/irc_catalog.hpp" namespace duckdb { @@ -37,10 +38,8 @@ struct IRCAPISchema { }; struct IRCAPITableCredentials { - string key_id; - string secret; - string session_token; - string region; + unique_ptr config; + vector storage_credentials; }; class IRCAPI { @@ -51,15 +50,14 @@ class IRCAPI { static void InitializeCurl(); static IRCAPITableCredentials GetTableCredentials(ClientContext &context, IRCatalog &catalog, const string &schema, - const string &table, IRCCredentials credentials); + const string &table, const string &secret_base_name); static vector GetCatalogs(ClientContext &context, IRCatalog &catalog, IRCCredentials credentials); static vector GetTables(ClientContext &context, IRCatalog &catalog, const string &schema); static IRCAPITable GetTable(ClientContext &context, IRCatalog &catalog, const string &schema, const string &table_name, optional_ptr credentials = nullptr); static vector GetSchemas(ClientContext &context, IRCatalog &catalog, IRCCredentials credentials); - static vector GetTablesInSchema(ClientContext &context, IRCatalog &catalog, const string &schema, - IRCCredentials credentials); - static string GetToken(ClientContext &context, string id, string secret, string endpoint); + static string GetToken(ClientContext &context, const string &uri, const string &id, const string &secret, + const string &endpoint, const string &scope); static IRCAPISchema CreateSchema(ClientContext &context, IRCatalog &catalog, const string &internal, const string &schema, IRCCredentials credentials); static void DropSchema(ClientContext &context, const string &internal, const string &schema, diff --git a/src/include/iceberg_types.hpp b/src/include/iceberg_types.hpp index 24d9e956..1be81408 100644 --- a/src/include/iceberg_types.hpp +++ b/src/include/iceberg_types.hpp @@ -27,7 +27,7 @@ static string IcebergManifestContentTypeToString(IcebergManifestContentType type case IcebergManifestContentType::DELETE: return "DELETE"; default: - throw IOException("Invalid Manifest Content Type"); + throw InvalidConfigurationException("Invalid Manifest Content Type"); } } @@ -42,7 +42,7 @@ static string IcebergManifestEntryStatusTypeToString(IcebergManifestEntryStatusT case IcebergManifestEntryStatusType::DELETED: return "DELETED"; default: - throw IOException("Invalid matifest entry type"); + throw InvalidConfigurationException("Invalid matifest entry type"); } } @@ -57,7 +57,7 @@ static string IcebergManifestEntryContentTypeToString(IcebergManifestEntryConten case IcebergManifestEntryContentType::EQUALITY_DELETES: return "EQUALITY_DELETES"; default: - throw IOException("Invalid Manifest Entry Content Type"); + throw InvalidConfigurationException("Invalid Manifest Entry Content Type"); } } diff --git a/src/include/storage/irc_catalog.hpp b/src/include/storage/irc_catalog.hpp index 677d9fbb..c7314d4f 100644 --- a/src/include/storage/irc_catalog.hpp +++ b/src/include/storage/irc_catalog.hpp @@ -15,10 +15,16 @@ class IRCSchemaEntry; struct IRCCredentials { string client_id; string client_secret; - // required to query s3 tables + //! required to query s3 tables string aws_region; - // Catalog generates the token using client id & secret + //! Catalog generates the token using client id & secret string token; + //! The scope of the OAuth token to request through the client_credentials flow + string oauth2_scope; + //! OAuth endpoint + string oauth2_endpoint; + //! The warehouse where the catalog lives + string warehouse; }; class ICRClearCacheFunction : public TableFunction { diff --git a/src/include/url_utils.hpp b/src/include/url_utils.hpp index 8c227c95..8fb6d04f 100644 --- a/src/include/url_utils.hpp +++ b/src/include/url_utils.hpp @@ -8,48 +8,44 @@ #pragma once -#include -#include -#include +#include "duckdb/common/string.hpp" +#include "duckdb/common/vector.hpp" +#include "duckdb/common/unordered_map.hpp" namespace duckdb { class IRCEndpointBuilder { public: - void AddPathComponent(std::string component); + void AddPathComponent(const string &component); + void AddQueryParameter(const string &key, const string &value); - void SetPrefix(std::string prefix_); - std::string GetPrefix() const; + void SetPrefix(const string &prefix_); + string GetPrefix() const; - void SetHost(std::string host_); - std::string GetHost() const; + void SetHost(const string &host_); + string GetHost() const; - void SetWarehouse(std::string warehouse_); - std::string GetWarehouse() const; + void SetVersion(const string &version_); + string GetVersion() const; - void SetVersion(std::string version_); - std::string GetVersion() const; + void SetParam(const string &key, const string &value); + string GetParam(const string &key) const; + const unordered_map GetParams(); - void SetParam(std::string key, std::string value); - std::string GetParam(std::string key) const; - const std::unordered_map GetParams(); - - std::string GetURL() const; + string GetURL() const; //! path components when querying. Like namespaces/tables etc. - std::vector path_components; + vector path_components; private: //! host of the endpoint, like `glue` or `polaris` - std::string host; + string host; //! version - std::string version; + string version; //! optional prefix - std::string prefix; - //! warehouse - std::string warehouse; + string prefix; - std::unordered_map params; + unordered_map params; }; } // namespace duckdb diff --git a/src/storage/irc_catalog.cpp b/src/storage/irc_catalog.cpp index b831789b..b95414da 100644 --- a/src/storage/irc_catalog.cpp +++ b/src/storage/irc_catalog.cpp @@ -137,8 +137,6 @@ DatabaseSize IRCatalog::GetDatabaseSize(ClientContext &context) { IRCEndpointBuilder IRCatalog::GetBaseUrl() const { auto base_url = IRCEndpointBuilder(); - base_url.SetPrefix(prefix); - base_url.SetWarehouse(warehouse); base_url.SetVersion(version); base_url.SetHost(host); switch (catalog_type) { @@ -168,14 +166,14 @@ unique_ptr IRCatalog::GetSecret(ClientContext &context, const strin if (!secret_entry) { auto secret_match = context.db->GetSecretManager().LookupSecret(transaction, "s3://", "s3"); if (!secret_match.HasMatch()) { - throw IOException("Failed to find a secret and no explicit secret was passed!"); + throw InvalidInputException("Failed to find a secret and no explicit secret was passed!"); } secret_entry = std::move(secret_match.secret_entry); } if (secret_entry) { return secret_entry; } - throw IOException("Could not find valid Iceberg secret"); + throw InvalidInputException("Could not find valid Iceberg secret"); } unique_ptr IRCatalog::PlanInsert(ClientContext &context, LogicalInsert &op, diff --git a/src/storage/irc_table_entry.cpp b/src/storage/irc_table_entry.cpp index b48616d0..d1439044 100644 --- a/src/storage/irc_table_entry.cpp +++ b/src/storage/irc_table_entry.cpp @@ -51,31 +51,17 @@ TableFunction ICTableEntry::GetScanFunction(ClientContext &context, unique_ptrschema_name, table_data->name, - ic_catalog.credentials); + auto secret_base_name = + StringUtil::Format("__internal_ic_%s__%s__%s", table_data->table_id, table_data->schema_name, table_data->name); + auto table_credentials = + IRCAPI::GetTableCredentials(context, ic_catalog, table_data->schema_name, table_data->name, secret_base_name); CreateSecretInfo info(OnCreateConflict::REPLACE_ON_CONFLICT, SecretPersistType::TEMPORARY); // First check if table credentials are set (possible the IC catalog does not return credentials) - if (!table_credentials.key_id.empty()) { - // Inject secret into secret manager scoped to this path - info.name = "__internal_ic_" + table_data->table_id; - info.type = "s3"; - info.provider = "config"; - info.storage_type = "memory"; - info.options = { - {"key_id", table_credentials.key_id}, - {"secret", table_credentials.secret}, - {"session_token", table_credentials.session_token}, - {"region", table_credentials.region}, - }; - - if (StringUtil::StartsWith(ic_catalog.host, "glue")) { - auto secret_entry = IRCatalog::GetSecret(context, ic_catalog.secret_name); - auto kv_secret = dynamic_cast(*secret_entry->secret); - auto region = kv_secret.TryGetValue("region").ToString(); - auto endpoint = "s3." + region + ".amazonaws.com"; - info.options["endpoint"] = endpoint; - } + if (table_credentials.config) { + auto &info = *table_credentials.config; + D_ASSERT(info.scope.empty()); + //! Limit the scope to the metadata location std::string lc_storage_location; lc_storage_location.resize(table_data->storage_location.size()); std::transform(table_data->storage_location.begin(), table_data->storage_location.end(), @@ -84,42 +70,37 @@ TableFunction ICTableEntry::GetScanFunction(ClientContext &context, unique_ptr(*secret_entry->secret); - info.name = "__internal_ic_" + table_data->table_id + "__" + table_data->schema_name + "__" + table_data->name; - info.type = "s3"; - info.provider = "config"; - info.storage_type = "memory"; - - auto substrings = StringUtil::Split(ic_catalog.warehouse, ":"); - D_ASSERT(substrings.size() == 6); - auto region = substrings[3]; - auto endpoint = "s3." + region + ".amazonaws.com"; - info.options = {{"key_id", kv_secret.TryGetValue("key_id").ToString()}, - {"secret", kv_secret.TryGetValue("secret").ToString()}, - {"session_token", kv_secret.TryGetValue("session_token").IsNull() - ? "" - : kv_secret.TryGetValue("session_token").ToString()}, - {"region", region}, - {"endpoint", endpoint}}; - std::string lc_storage_location; - lc_storage_location.resize(table_data->storage_location.size()); - std::transform(table_data->storage_location.begin(), table_data->storage_location.end(), - lc_storage_location.begin(), ::tolower); - size_t metadata_pos = lc_storage_location.find("metadata"); - if (metadata_pos != std::string::npos) { - info.scope = {lc_storage_location.substr(0, metadata_pos)}; - } else { - throw std::runtime_error("Substring not found"); + if (StringUtil::StartsWith(ic_catalog.host, "glue")) { + //! Override the endpoint if 'glue' is the host of the catalog + auto secret_entry = IRCatalog::GetSecret(context, ic_catalog.secret_name); + auto kv_secret = dynamic_cast(*secret_entry->secret); + auto region = kv_secret.TryGetValue("region").ToString(); + auto endpoint = "s3." + region + ".amazonaws.com"; + info.options["endpoint"] = endpoint; + } else if (StringUtil::StartsWith(ic_catalog.host, "s3tables")) { + //! Override all the options if 's3tables' is the host of the catalog + auto secret_entry = IRCatalog::GetSecret(context, ic_catalog.secret_name); + auto kv_secret = dynamic_cast(*secret_entry->secret); + auto substrings = StringUtil::Split(ic_catalog.warehouse, ":"); + D_ASSERT(substrings.size() == 6); + auto region = substrings[3]; + auto endpoint = "s3." + region + ".amazonaws.com"; + info.options = {{"key_id", kv_secret.TryGetValue("key_id").ToString()}, + {"secret", kv_secret.TryGetValue("secret").ToString()}, + {"session_token", kv_secret.TryGetValue("session_token").IsNull() + ? "" + : kv_secret.TryGetValue("session_token").ToString()}, + {"region", region}, + {"endpoint", endpoint}}; } - auto my_secret = secret_manager.CreateSecret(context, info); + (void)secret_manager.CreateSecret(context, info); + } + + for (auto &info : table_credentials.storage_credentials) { + (void)secret_manager.CreateSecret(context, info); } named_parameter_map_t param_map; diff --git a/src/storage/irc_table_set.cpp b/src/storage/irc_table_set.cpp index 38573b56..71bac284 100644 --- a/src/storage/irc_table_set.cpp +++ b/src/storage/irc_table_set.cpp @@ -35,7 +35,7 @@ unique_ptr ICTableSet::_CreateCatalogEntry(ClientContext &context, auto table_entry = make_uniq(catalog, schema, info); table_entry->table_data = make_uniq(table); - return table_entry; + return std::move(table_entry); } void ICTableSet::FillEntry(ClientContext &context, unique_ptr &entry) { diff --git a/test/sql/cloud/glue/test_glue.test b/test/sql/cloud/glue/test_glue.test index 761ab460..7f4f339e 100644 --- a/test/sql/cloud/glue/test_glue.test +++ b/test/sql/cloud/glue/test_glue.test @@ -65,7 +65,7 @@ attach '84014028:s3tablescatalog/pyiceberg-blog-bucket' as bad_account_id ( ENDPOINT_TYPE 'GLUE' ); ---- -IO Error +Invalid Configuration Error statement error attach '840140254803:incorrect/pyiceberg-blog-bucket' as no_s3tables ( @@ -73,6 +73,6 @@ attach '840140254803:incorrect/pyiceberg-blog-bucket' as no_s3tables ( ENDPOINT_TYPE 'GLUE' ); ---- -IO Error +Invalid Configuration Error diff --git a/test/sql/cloud/test_bad_connections.test b/test/sql/cloud/test_bad_connections.test index a22db99a..282245bb 100644 --- a/test/sql/cloud/test_bad_connections.test +++ b/test/sql/cloud/test_bad_connections.test @@ -19,7 +19,7 @@ attach 'arn:aws:s3tables:us-east-2:840140254803:bucket/iceberg-testing' as s3tab SECRET 's3table_secret' ); ---- -IO Error +Invalid Input Error statement error attach '840140254803:s3tablescatalog/pyiceberg-blog-bucket' as glue_catalog ( @@ -28,7 +28,7 @@ attach '840140254803:s3tablescatalog/pyiceberg-blog-bucket' as glue_catalog ( SECRET 'glue_secret' ); ---- -IO Error +Invalid Input Error statement error @@ -37,7 +37,7 @@ attach 'arn:aws:s3tables:us-east-2:840140254803:bucket/iceberg-testing' as s3tab ENDPOINT_TYPE 'S3_TABLES_XX' ); ---- -IO Error +Invalid Configuration Error statement error @@ -46,7 +46,7 @@ attach '840140254803:s3tablescatalog/pyiceberg-blog-bucket' as glue_catalog ( ENDPOINT_TYPE 'GLUE_XXX' ); ---- -IO Error +Invalid Configuration Error statement ok CREATE SECRET ( @@ -60,6 +60,6 @@ attach 'quickstart_catalog' as my_datalake ( ENDPOINT 'http://does_not_exist.com/api/catalog' ) ---- -IO Error +HTTP Error diff --git a/test/sql/local/iceberg_on_tpch.test b/test/sql/local/iceberg_on_tpch.test index db725256..8b9b8a71 100644 --- a/test/sql/local/iceberg_on_tpch.test +++ b/test/sql/local/iceberg_on_tpch.test @@ -22,7 +22,7 @@ CREATE SECRET ( ENDPOINT '127.0.0.1:9000', URL_STYLE 'path', USE_SSL 0 - ); +); statement ok ATTACH '' AS my_datalake ( diff --git a/test/sql/local/iceberg_scans/iceberg_metadata.test b/test/sql/local/iceberg_scans/iceberg_metadata.test index 6708a29e..7628f538 100644 --- a/test/sql/local/iceberg_scans/iceberg_metadata.test +++ b/test/sql/local/iceberg_scans/iceberg_metadata.test @@ -40,12 +40,12 @@ lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro 2 DATA DE statement error SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE); ---- -IO Error: Iceberg metadata file not found for table version '2' using 'none' compression and format(s): 'v%s%s.metadata.json,%s%s.metadata.json' +Invalid Configuration Error: Iceberg metadata file not found for table version '2' using 'none' compression and format(s): 'v%s%s.metadata.json,%s%s.metadata.json' statement error SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="blarg", version_name_format='blat%s%s'); ---- -IO Error: Iceberg metadata file not found for table version '2' using 'blarg' compression and format(s): 'blat%s%s' +Invalid Configuration Error: Iceberg metadata file not found for table version '2' using 'blarg' compression and format(s): 'blat%s%s' query IIIIIIII SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip"); @@ -55,7 +55,7 @@ lineitem_iceberg_gz/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro 0 DATA statement error SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg_nonexistent'); ---- -IO Error: Failed to read iceberg table. No version was provided and no version-hint could be found, +Invalid Configuration Error: Failed to read iceberg table. No version was provided and no version-hint could be found, statement error SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE); diff --git a/test/sql/local/iceberg_scans/iceberg_scan.test b/test/sql/local/iceberg_scans/iceberg_scan.test index 968281ad..cb1cbcad 100644 --- a/test/sql/local/iceberg_scans/iceberg_scan.test +++ b/test/sql/local/iceberg_scans/iceberg_scan.test @@ -60,12 +60,12 @@ SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg', sn statement error FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg', snapshot_from_timestamp='2023-02-15 15:07:54.503'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); ---- -IO Error: Could not find latest snapshots for timestamp 2023-02-15 15:07:54.503 +Invalid Configuration Error: Could not find latest snapshots for timestamp 2023-02-15 15:07:54.503 statement error SELECT * FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE); ---- -IO Error: Iceberg metadata file not found for table version '2' using 'none' compression and format(s): 'v%s%s.metadata.json,%s%s.metadata.json' +Invalid Configuration Error: Iceberg metadata file not found for table version '2' using 'none' compression and format(s): 'v%s%s.metadata.json,%s%s.metadata.json' query I SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip"); @@ -109,7 +109,7 @@ SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg_no_h statement error FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg_no_hint', snapshot_from_timestamp='2023-02-15 15:07:54.503'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); ---- -IO Error: Could not find latest snapshots for timestamp 2023-02-15 15:07:54.503 +Invalid Configuration Error: Could not find latest snapshots for timestamp 2023-02-15 15:07:54.503 query I SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg_gz_no_hint', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip"); diff --git a/test/sql/local/iceberg_scans/iceberg_snapshots.test b/test/sql/local/iceberg_scans/iceberg_snapshots.test index 189baac8..b3f2cd13 100644 --- a/test/sql/local/iceberg_scans/iceberg_snapshots.test +++ b/test/sql/local/iceberg_scans/iceberg_snapshots.test @@ -30,7 +30,7 @@ SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg', vers statement error SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg', version="1", version_name_format='v%s%s.metadata.gz'); ---- -IO Error: Iceberg metadata file not found for table version '1' using 'none' compression and format(s): 'v%s%s.metadata.gz' +Invalid Configuration Error: Iceberg metadata file not found for table version '1' using 'none' compression and format(s): 'v%s%s.metadata.gz' query IIII SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg', version="1", version_name_format='v%s%s.metadata.json'); @@ -45,12 +45,12 @@ SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg', vers statement error SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg_nonexistent'); ---- -IO Error: Failed to read iceberg table. No version was provided and no version-hint could be found, +Invalid Configuration Error: Failed to read iceberg table. No version was provided and no version-hint could be found, statement error SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg_gz'); ---- -IO Error: Iceberg metadata file not found for table version '2' using 'none' compression and format(s): 'v%s%s.metadata.json,%s%s.metadata.json' +Invalid Configuration Error: Iceberg metadata file not found for table version '2' using 'none' compression and format(s): 'v%s%s.metadata.json,%s%s.metadata.json' query IIII SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg_gz', metadata_compression_codec="gzip"); diff --git a/test/sql/local/irc/iceberg_catalog_read.test b/test/sql/local/irc/iceberg_catalog_read.test index 9734f854..949116ba 100644 --- a/test/sql/local/irc/iceberg_catalog_read.test +++ b/test/sql/local/irc/iceberg_catalog_read.test @@ -72,10 +72,11 @@ select * from my_datalake.default.table_more_deletes order by all; 2023-03-11 11 k 2023-03-12 12 l +# TODO: after https://github.com/duckdb/duckdb-iceberg/pull/142 is merged, we can combine the +# following two tests into one query I -SELECT message FROM duckdb_logs where type='iceberg.Catalog.Curl.HTTPRequest' order by timestamp +SELECT message FROM duckdb_logs where type='iceberg.Catalog.Curl.HTTPRequest' and message like '%GET%' order by timestamp ---- -POST http://127.0.0.1:8181/v1/oauth/tokens (curl code 'No error') GET http://127.0.0.1:8181/v1/config?warehouse= (curl code 'No error') GET http://127.0.0.1:8181/v1/namespaces (curl code 'No error') GET http://127.0.0.1:8181/v1/namespaces/default/tables (curl code 'No error') @@ -84,6 +85,11 @@ GET http://127.0.0.1:8181/v1/namespaces/default/tables/table_unpartitioned (curl GET http://127.0.0.1:8181/v1/namespaces/default/tables/table_more_deletes (curl code 'No error') GET http://127.0.0.1:8181/v1/namespaces/default/tables/table_more_deletes (curl code 'No error') +query I +SELECT message FROM duckdb_logs where type='iceberg.Catalog.Curl.HTTPRequest' and message like '%POST%' order by timestamp +---- +POST http://127.0.0.1:8181/v1/oauth/tokens (curl code 'No error') + query I select count(*) from my_datalake.default.pyspark_iceberg_table_v2; ---- diff --git a/test/sql/local/irc/test_mutliple_keys.test b/test/sql/local/irc/test_mutliple_keys.test index 8db283ad..6dd76df7 100644 --- a/test/sql/local/irc/test_mutliple_keys.test +++ b/test/sql/local/irc/test_mutliple_keys.test @@ -40,7 +40,7 @@ attach '840140254803:s3tablescatalog/pyiceberg-blog-bucket' as glue_lake ( ENDPOINT_TYPE 'GLUE' ); ---- -IO Error +Invalid Configuration Error statement ok CREATE or replace SECRET amazing_secret ( @@ -60,7 +60,7 @@ attach '840140254803:s3tablescatalog/pyiceberg-blog-bucket' as glue_lake ( ENDPOINT_TYPE 'GLUE' ); ---- -IO Error +HTTP Error # TODO: fix me. Only fails in CI mode skip diff --git a/test/sql/local/irc/test_polaris.test b/test/sql/local/irc/test_polaris.test index fffeee0f..68c39f12 100644 --- a/test/sql/local/irc/test_polaris.test +++ b/test/sql/local/irc/test_polaris.test @@ -2,6 +2,12 @@ # description: test integration with iceberg catalog read # group: [irc] +require-env POLARIS_CLIENT_ID + +require-env POLARIS_CLIENT_SECRET + +require-env POLARIS_SERVER_AVAILABLE + require avro require parquet @@ -12,17 +18,15 @@ require iceberg require aws -mode skip - statement ok create secret polaris_secret ( - type s3, - KEY_ID '', - SECRET '' + TYPE S3, + KEY_ID '${POLARIS_CLIENT_ID}', + SECRET '${POLARIS_CLIENT_SECRET}' ); statement ok -attach 'polaris_demo' as my_datalake ( +attach 'quickstart_catalog' as my_datalake ( type ICEBERG, ENDPOINT 'http://0.0.0.0:8181/api/catalog' ); @@ -30,9 +34,16 @@ attach 'polaris_demo' as my_datalake ( statement ok show all tables; +# tables may be added, we just want to make sure we are accurately getting the information +query I +select count(*) > 0 from (show all tables); +---- +true + query II -select * from namespace.catalog.table; +select * from my_datalake.COLLADO_TEST.quickstart_table; ---- -1 -2 +1 some data +2 more data +3 yet more data diff --git a/test/sql/local/irc/test_polaris_tpch.test b/test/sql/local/irc/test_polaris_tpch.test new file mode 100644 index 00000000..19ea0028 --- /dev/null +++ b/test/sql/local/irc/test_polaris_tpch.test @@ -0,0 +1,56 @@ +# name: test/sql/local/irc/test_polaris_tpch.test +# description: test integration with iceberg catalog read +# group: [irc] + +require-env POLARIS_CLIENT_ID + +require-env POLARIS_CLIENT_SECRET + +require-env POLARIS_SERVER_AVAILABLE + +require avro + +require parquet + +require httpfs + +require iceberg + +require aws + +require tpch + +statement ok +create secret polaris_secret ( + TYPE S3, + KEY_ID '${POLARIS_CLIENT_ID}', + SECRET '${POLARIS_CLIENT_SECRET}' +); + +statement ok +attach 'quickstart_catalog' as my_datalake ( + type ICEBERG, + ENDPOINT 'http://0.0.0.0:8181/api/catalog' +); + +statement ok +use my_datalake.default; + +# FIXME: run smaller scale with full dataset +loop i 1 9 + +query I +PRAGMA tpch(${i}) +---- +:duckdb/extension/tpch/dbgen/answers/sf1/q0${i}.csv + +endloop + +loop i 10 23 + +query I +PRAGMA tpch(${i}) +---- +:duckdb/extension/tpch/dbgen/answers/sf1/q${i}.csv + +endloop \ No newline at end of file diff --git a/vcpkg.json b/vcpkg.json index 10371314..d5d7e32d 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -1,5 +1,6 @@ { "dependencies": [ + "vcpkg-cmake", "avro-c", "curl", "openssl", @@ -24,4 +25,4 @@ "version": "3.0.8" } ] -} \ No newline at end of file +} diff --git a/vcpkg_ports/avro.patch b/vcpkg_ports/avro-c/avro.patch similarity index 100% rename from vcpkg_ports/avro.patch rename to vcpkg_ports/avro-c/avro.patch diff --git a/vcpkg_ports/duckdb.patch b/vcpkg_ports/avro-c/duckdb.patch similarity index 100% rename from vcpkg_ports/duckdb.patch rename to vcpkg_ports/avro-c/duckdb.patch diff --git a/vcpkg_ports/portfile.cmake b/vcpkg_ports/avro-c/portfile.cmake similarity index 100% rename from vcpkg_ports/portfile.cmake rename to vcpkg_ports/avro-c/portfile.cmake diff --git a/vcpkg_ports/vcpkg.json b/vcpkg_ports/avro-c/vcpkg.json similarity index 100% rename from vcpkg_ports/vcpkg.json rename to vcpkg_ports/avro-c/vcpkg.json diff --git a/vcpkg_ports/vcpkg-cmake/portfile.cmake b/vcpkg_ports/vcpkg-cmake/portfile.cmake new file mode 100644 index 00000000..0b7dd502 --- /dev/null +++ b/vcpkg_ports/vcpkg-cmake/portfile.cmake @@ -0,0 +1,14 @@ +if(VCPKG_CROSSCOMPILING) + # make FATAL_ERROR in CI when issue #16773 fixed + message(WARNING "vcpkg-cmake is a host-only port; please mark it as a host port in your dependencies.") +endif() + +file(INSTALL + "${CMAKE_CURRENT_LIST_DIR}/vcpkg_cmake_configure.cmake" + "${CMAKE_CURRENT_LIST_DIR}/vcpkg_cmake_build.cmake" + "${CMAKE_CURRENT_LIST_DIR}/vcpkg_cmake_install.cmake" + "${CMAKE_CURRENT_LIST_DIR}/vcpkg-port-config.cmake" + DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}") + +file(INSTALL "${VCPKG_ROOT_DIR}/LICENSE.txt" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}" RENAME copyright) +set(VCPKG_POLICY_CMAKE_HELPER_PORT enabled) diff --git a/vcpkg_ports/vcpkg-cmake/vcpkg-port-config.cmake b/vcpkg_ports/vcpkg-cmake/vcpkg-port-config.cmake new file mode 100644 index 00000000..f2a973d4 --- /dev/null +++ b/vcpkg_ports/vcpkg-cmake/vcpkg-port-config.cmake @@ -0,0 +1,3 @@ +include("${CMAKE_CURRENT_LIST_DIR}/vcpkg_cmake_configure.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/vcpkg_cmake_build.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/vcpkg_cmake_install.cmake") diff --git a/vcpkg_ports/vcpkg-cmake/vcpkg.json b/vcpkg_ports/vcpkg-cmake/vcpkg.json new file mode 100644 index 00000000..fa484eaf --- /dev/null +++ b/vcpkg_ports/vcpkg-cmake/vcpkg.json @@ -0,0 +1,6 @@ +{ + "name": "vcpkg-cmake", + "version-date": "2024-04-23", + "documentation": "https://learn.microsoft.com/vcpkg/maintainers/functions/vcpkg_cmake_configure", + "license": "MIT" +} diff --git a/vcpkg_ports/vcpkg-cmake/vcpkg_cmake_build.cmake b/vcpkg_ports/vcpkg-cmake/vcpkg_cmake_build.cmake new file mode 100644 index 00000000..47933b3f --- /dev/null +++ b/vcpkg_ports/vcpkg-cmake/vcpkg_cmake_build.cmake @@ -0,0 +1,91 @@ +include_guard(GLOBAL) + +function(vcpkg_cmake_build) + cmake_parse_arguments(PARSE_ARGV 0 "arg" "DISABLE_PARALLEL;ADD_BIN_TO_PATH" "TARGET;LOGFILE_BASE" "") + + if(DEFINED arg_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "vcpkg_cmake_build was passed extra arguments: ${arg_UNPARSED_ARGUMENTS}") + endif() + if(NOT DEFINED arg_LOGFILE_BASE) + set(arg_LOGFILE_BASE "build") + endif() + vcpkg_list(SET build_param) + vcpkg_list(SET parallel_param) + vcpkg_list(SET no_parallel_param) + + if("${Z_VCPKG_CMAKE_GENERATOR}" STREQUAL "Ninja") + vcpkg_list(SET build_param "-v") # verbose output + vcpkg_list(SET parallel_param "-j${VCPKG_CONCURRENCY}") + vcpkg_list(SET no_parallel_param "-j1") + elseif("${Z_VCPKG_CMAKE_GENERATOR}" MATCHES "^Visual Studio") + vcpkg_list(SET build_param + "/p:VCPkgLocalAppDataDisabled=true" + "/p:UseIntelMKL=No" + ) + vcpkg_list(SET parallel_param "/m") + elseif("${Z_VCPKG_CMAKE_GENERATOR}" STREQUAL "NMake Makefiles") + # No options are currently added for nmake builds + elseif(Z_VCPKG_CMAKE_GENERATOR STREQUAL "Unix Makefiles") + vcpkg_list(SET build_param "VERBOSE=1") + vcpkg_list(SET parallel_param "-j${VCPKG_CONCURRENCY}") + vcpkg_list(SET no_parallel_param "") + elseif(Z_VCPKG_CMAKE_GENERATOR STREQUAL "Xcode") + vcpkg_list(SET parallel_param -jobs "${VCPKG_CONCURRENCY}") + vcpkg_list(SET no_parallel_param -jobs 1) + else() + message(WARNING "Unrecognized GENERATOR setting from vcpkg_cmake_configure().") + endif() + + vcpkg_list(SET target_param) + if(arg_TARGET) + vcpkg_list(SET target_param "--target" "${arg_TARGET}") + endif() + + foreach(build_type IN ITEMS debug release) + if(NOT DEFINED VCPKG_BUILD_TYPE OR "${VCPKG_BUILD_TYPE}" STREQUAL "${build_type}") + if("${build_type}" STREQUAL "debug") + set(short_build_type "dbg") + set(config "Debug") + else() + set(short_build_type "rel") + set(config "Release") + endif() + + message(STATUS "Building ${TARGET_TRIPLET}-${short_build_type}") + + if(arg_ADD_BIN_TO_PATH) + vcpkg_backup_env_variables(VARS PATH) + if("${build_type}" STREQUAL "debug") + vcpkg_add_to_path(PREPEND "${CURRENT_INSTALLED_DIR}/debug/bin") + else() + vcpkg_add_to_path(PREPEND "${CURRENT_INSTALLED_DIR}/bin") + endif() + endif() + + if(arg_DISABLE_PARALLEL) + vcpkg_execute_build_process( + COMMAND + "${CMAKE_COMMAND}" --build . --config "${config}" ${target_param} + -- ${build_param} ${no_parallel_param} + WORKING_DIRECTORY "${CURRENT_BUILDTREES_DIR}/${TARGET_TRIPLET}-${short_build_type}" + LOGNAME "${arg_LOGFILE_BASE}-${TARGET_TRIPLET}-${short_build_type}" + ) + else() + vcpkg_execute_build_process( + COMMAND + "${CMAKE_COMMAND}" --build . --config "${config}" ${target_param} + -- ${build_param} ${parallel_param} + NO_PARALLEL_COMMAND + "${CMAKE_COMMAND}" --build . --config "${config}" ${target_param} + -- ${build_param} ${no_parallel_param} + WORKING_DIRECTORY "${CURRENT_BUILDTREES_DIR}/${TARGET_TRIPLET}-${short_build_type}" + LOGNAME "${arg_LOGFILE_BASE}-${TARGET_TRIPLET}-${short_build_type}" + ) + endif() + + if(arg_ADD_BIN_TO_PATH) + vcpkg_restore_env_variables(VARS PATH) + endif() + endif() + endforeach() +endfunction() diff --git a/vcpkg_ports/vcpkg-cmake/vcpkg_cmake_configure.cmake b/vcpkg_ports/vcpkg-cmake/vcpkg_cmake_configure.cmake new file mode 100644 index 00000000..39ea39c2 --- /dev/null +++ b/vcpkg_ports/vcpkg-cmake/vcpkg_cmake_configure.cmake @@ -0,0 +1,355 @@ +include_guard(GLOBAL) + +macro(z_vcpkg_cmake_configure_both_set_or_unset var1 var2) + if(DEFINED ${var1} AND NOT DEFINED ${var2}) + message(FATAL_ERROR "If ${var1} is set, then ${var2} must be set.") + elseif(NOT DEFINED ${var1} AND DEFINED ${var2}) + message(FATAL_ERROR "If ${var2} is set, then ${var1} must be set.") + endif() +endmacro() + +function(vcpkg_cmake_configure) + cmake_parse_arguments(PARSE_ARGV 0 "arg" + "PREFER_NINJA;DISABLE_PARALLEL_CONFIGURE;WINDOWS_USE_MSBUILD;NO_CHARSET_FLAG;Z_CMAKE_GET_VARS_USAGE" + "SOURCE_PATH;GENERATOR;LOGFILE_BASE" + "OPTIONS;OPTIONS_DEBUG;OPTIONS_RELEASE;MAYBE_UNUSED_VARIABLES" + ) + + if(NOT arg_Z_CMAKE_GET_VARS_USAGE AND DEFINED CACHE{Z_VCPKG_CMAKE_GENERATOR}) + message(WARNING "${CMAKE_CURRENT_FUNCTION} already called; this function should only be called once.") + endif() + if(arg_PREFER_NINJA) + message(WARNING "PREFER_NINJA has been deprecated in ${CMAKE_CURRENT_FUNCTION}. Please remove it from the portfile!") + endif() + + if(DEFINED arg_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "${CMAKE_CURRENT_FUNCTION} was passed extra arguments: ${arg_UNPARSED_ARGUMENTS}") + endif() + + if(NOT DEFINED arg_SOURCE_PATH) + message(FATAL_ERROR "SOURCE_PATH must be set") + endif() + if(NOT DEFINED arg_LOGFILE_BASE) + set(arg_LOGFILE_BASE "config-${TARGET_TRIPLET}") + endif() + + set(invalid_maybe_unused_vars "${arg_MAYBE_UNUSED_VARIABLES}") + list(FILTER invalid_maybe_unused_vars INCLUDE REGEX "^-D") + if(NOT invalid_maybe_unused_vars STREQUAL "") + list(JOIN invalid_maybe_unused_vars " " bad_items) + message(${Z_VCPKG_BACKCOMPAT_MESSAGE_LEVEL} + "Option MAYBE_UNUSED_VARIABLES must be used with variables names. " + "The following items are invalid: ${bad_items}") + endif() + + set(manually_specified_variables "") + + vcpkg_list(APPEND arg_OPTIONS "-DCMAKE_POLICY_VERSION_MINIMUM=3.5") + + if(arg_Z_CMAKE_GET_VARS_USAGE) + set(configuring_message "Getting CMake variables for ${TARGET_TRIPLET}") + else() + set(configuring_message "Configuring ${TARGET_TRIPLET}") + + foreach(option IN LISTS arg_OPTIONS arg_OPTIONS_RELEASE arg_OPTIONS_DEBUG) + if("${option}" MATCHES "^-D([^:=]*)[:=]") + vcpkg_list(APPEND manually_specified_variables "${CMAKE_MATCH_1}") + endif() + endforeach() + vcpkg_list(REMOVE_DUPLICATES manually_specified_variables) + foreach(maybe_unused_var IN LISTS arg_MAYBE_UNUSED_VARIABLES) + vcpkg_list(REMOVE_ITEM manually_specified_variables "${maybe_unused_var}") + endforeach() + debug_message("manually specified variables: ${manually_specified_variables}") + endif() + + if(CMAKE_HOST_WIN32) + if(DEFINED ENV{PROCESSOR_ARCHITEW6432}) + set(host_architecture "$ENV{PROCESSOR_ARCHITEW6432}") + else() + set(host_architecture "$ENV{PROCESSOR_ARCHITECTURE}") + endif() + endif() + + set(ninja_host ON) # Ninja availability + if(host_architecture STREQUAL "x86" OR DEFINED ENV{VCPKG_FORCE_SYSTEM_BINARIES}) + # Prebuilt ninja binaries are only provided for x64 hosts + find_program(NINJA NAMES ninja ninja-build) + if(NOT NINJA) + set(ninja_host OFF) + set(arg_DISABLE_PARALLEL_CONFIGURE ON) + set(arg_WINDOWS_USE_MSBUILD ON) + endif() + endif() + + set(generator "") + set(architecture_options "") + if(arg_WINDOWS_USE_MSBUILD AND VCPKG_HOST_IS_WINDOWS AND VCPKG_TARGET_IS_WINDOWS AND NOT VCPKG_TARGET_IS_MINGW) + z_vcpkg_get_visual_studio_generator(OUT_GENERATOR generator OUT_ARCH arch) + vcpkg_list(APPEND architecture_options "-A${arch}") + if(DEFINED VCPKG_PLATFORM_TOOLSET) + vcpkg_list(APPEND arg_OPTIONS "-T${VCPKG_PLATFORM_TOOLSET}") + endif() + if(NOT generator) + message(FATAL_ERROR "Unable to determine appropriate Visual Studio generator for triplet ${TARGET_TRIPLET}: + ENV{VisualStudioVersion} : $ENV{VisualStudioVersion} + VCPKG_TARGET_ARCHITECTURE: ${VCPKG_TARGET_ARCHITECTURE}") + endif() + elseif(DEFINED arg_GENERATOR) + set(generator "${arg_GENERATOR}") + elseif(ninja_host) + set(generator "Ninja") + elseif(NOT VCPKG_HOST_IS_WINDOWS) + set(generator "Unix Makefiles") + endif() + + if(NOT generator) + if(NOT VCPKG_CMAKE_SYSTEM_NAME) + set(VCPKG_CMAKE_SYSTEM_NAME "Windows") + endif() + message(FATAL_ERROR "Unable to determine appropriate generator for: " + "${VCPKG_CMAKE_SYSTEM_NAME}-${VCPKG_TARGET_ARCHITECTURE}-${VCPKG_PLATFORM_TOOLSET}") + endif() + + set(parallel_log_args "") + set(log_args "") + + if(generator STREQUAL "Ninja") + vcpkg_find_acquire_program(NINJA) + vcpkg_list(APPEND arg_OPTIONS "-DCMAKE_MAKE_PROGRAM=${NINJA}") + # If we use Ninja, it must be on PATH for CMake's ExternalProject, + # cf. https://gitlab.kitware.com/cmake/cmake/-/issues/23355. + get_filename_component(ninja_path "${NINJA}" DIRECTORY) + vcpkg_add_to_path("${ninja_path}") + set(parallel_log_args + "../build.ninja" ALIAS "rel-ninja.log" + "../../${TARGET_TRIPLET}-dbg/build.ninja" ALIAS "dbg-ninja.log" + ) + set(log_args "build.ninja") + endif() + + set(build_dir_release "${CURRENT_BUILDTREES_DIR}/${TARGET_TRIPLET}-rel") + set(build_dir_debug "${CURRENT_BUILDTREES_DIR}/${TARGET_TRIPLET}-dbg") + file(REMOVE_RECURSE + "${build_dir_release}" + "${build_dir_debug}") + file(MAKE_DIRECTORY "${build_dir_release}") + if(NOT DEFINED VCPKG_BUILD_TYPE OR VCPKG_BUILD_TYPE STREQUAL "debug") + file(MAKE_DIRECTORY "${build_dir_debug}") + endif() + + if(DEFINED VCPKG_CMAKE_SYSTEM_NAME) + vcpkg_list(APPEND arg_OPTIONS "-DCMAKE_SYSTEM_NAME=${VCPKG_CMAKE_SYSTEM_NAME}") + if(VCPKG_TARGET_IS_UWP AND NOT DEFINED VCPKG_CMAKE_SYSTEM_VERSION) + set(VCPKG_CMAKE_SYSTEM_VERSION 10.0) + elseif(VCPKG_TARGET_IS_ANDROID AND NOT DEFINED VCPKG_CMAKE_SYSTEM_VERSION) + set(VCPKG_CMAKE_SYSTEM_VERSION 21) + endif() + endif() + + if(DEFINED VCPKG_CMAKE_SYSTEM_VERSION) + vcpkg_list(APPEND arg_OPTIONS "-DCMAKE_SYSTEM_VERSION=${VCPKG_CMAKE_SYSTEM_VERSION}") + endif() + + if(DEFINED VCPKG_XBOX_CONSOLE_TARGET) + vcpkg_list(APPEND arg_OPTIONS "-DXBOX_CONSOLE_TARGET=${VCPKG_XBOX_CONSOLE_TARGET}") + endif() + + if(VCPKG_LIBRARY_LINKAGE STREQUAL "dynamic") + vcpkg_list(APPEND arg_OPTIONS "-DBUILD_SHARED_LIBS=ON") + elseif(VCPKG_LIBRARY_LINKAGE STREQUAL "static") + vcpkg_list(APPEND arg_OPTIONS "-DBUILD_SHARED_LIBS=OFF") + else() + message(FATAL_ERROR + "Invalid setting for VCPKG_LIBRARY_LINKAGE: \"${VCPKG_LIBRARY_LINKAGE}\". " + "It must be \"static\" or \"dynamic\"") + endif() + + z_vcpkg_cmake_configure_both_set_or_unset(VCPKG_CXX_FLAGS_DEBUG VCPKG_C_FLAGS_DEBUG) + z_vcpkg_cmake_configure_both_set_or_unset(VCPKG_CXX_FLAGS_RELEASE VCPKG_C_FLAGS_RELEASE) + z_vcpkg_cmake_configure_both_set_or_unset(VCPKG_CXX_FLAGS VCPKG_C_FLAGS) + + set(VCPKG_SET_CHARSET_FLAG ON) + if(arg_NO_CHARSET_FLAG) + set(VCPKG_SET_CHARSET_FLAG OFF) + endif() + + if(NOT DEFINED VCPKG_CHAINLOAD_TOOLCHAIN_FILE) + z_vcpkg_select_default_vcpkg_chainload_toolchain() + endif() + + list(JOIN VCPKG_TARGET_ARCHITECTURE "\;" target_architecture_string) + vcpkg_list(APPEND arg_OPTIONS + "-DVCPKG_CHAINLOAD_TOOLCHAIN_FILE=${VCPKG_CHAINLOAD_TOOLCHAIN_FILE}" + "-DVCPKG_TARGET_TRIPLET=${TARGET_TRIPLET}" + "-DVCPKG_SET_CHARSET_FLAG=${VCPKG_SET_CHARSET_FLAG}" + "-DVCPKG_PLATFORM_TOOLSET=${VCPKG_PLATFORM_TOOLSET}" + "-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON" + "-DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON" + "-DCMAKE_FIND_PACKAGE_NO_SYSTEM_PACKAGE_REGISTRY=ON" + "-DCMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_SKIP=TRUE" + "-DCMAKE_VERBOSE_MAKEFILE=ON" + "-DVCPKG_APPLOCAL_DEPS=OFF" + "-DCMAKE_TOOLCHAIN_FILE=${SCRIPTS}/buildsystems/vcpkg.cmake" + "-DCMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION=ON" + "-DVCPKG_CXX_FLAGS=${VCPKG_CXX_FLAGS}" + "-DVCPKG_CXX_FLAGS_RELEASE=${VCPKG_CXX_FLAGS_RELEASE}" + "-DVCPKG_CXX_FLAGS_DEBUG=${VCPKG_CXX_FLAGS_DEBUG}" + "-DVCPKG_C_FLAGS=${VCPKG_C_FLAGS}" + "-DVCPKG_C_FLAGS_RELEASE=${VCPKG_C_FLAGS_RELEASE}" + "-DVCPKG_C_FLAGS_DEBUG=${VCPKG_C_FLAGS_DEBUG}" + "-DVCPKG_CRT_LINKAGE=${VCPKG_CRT_LINKAGE}" + "-DVCPKG_LINKER_FLAGS=${VCPKG_LINKER_FLAGS}" + "-DVCPKG_LINKER_FLAGS_RELEASE=${VCPKG_LINKER_FLAGS_RELEASE}" + "-DVCPKG_LINKER_FLAGS_DEBUG=${VCPKG_LINKER_FLAGS_DEBUG}" + "-DVCPKG_TARGET_ARCHITECTURE=${target_architecture_string}" + "-DCMAKE_INSTALL_LIBDIR:STRING=lib" + "-DCMAKE_INSTALL_BINDIR:STRING=bin" + "-D_VCPKG_ROOT_DIR=${VCPKG_ROOT_DIR}" + "-D_VCPKG_INSTALLED_DIR=${_VCPKG_INSTALLED_DIR}" + "-DVCPKG_MANIFEST_INSTALL=OFF" + ) + + # Sets configuration variables for macOS builds + foreach(config_var IN ITEMS INSTALL_NAME_DIR OSX_DEPLOYMENT_TARGET OSX_SYSROOT OSX_ARCHITECTURES) + if(DEFINED VCPKG_${config_var}) + vcpkg_list(APPEND arg_OPTIONS "-DCMAKE_${config_var}=${VCPKG_${config_var}}") + endif() + endforeach() + + vcpkg_list(PREPEND arg_OPTIONS "-DFETCHCONTENT_FULLY_DISCONNECTED=ON") + + # Allow overrides / additional configuration variables from triplets + if(DEFINED VCPKG_CMAKE_CONFIGURE_OPTIONS) + vcpkg_list(APPEND arg_OPTIONS ${VCPKG_CMAKE_CONFIGURE_OPTIONS}) + endif() + if(DEFINED VCPKG_CMAKE_CONFIGURE_OPTIONS_RELEASE) + vcpkg_list(APPEND arg_OPTIONS_RELEASE ${VCPKG_CMAKE_CONFIGURE_OPTIONS_RELEASE}) + endif() + if(DEFINED VCPKG_CMAKE_CONFIGURE_OPTIONS_DEBUG) + vcpkg_list(APPEND arg_OPTIONS_DEBUG ${VCPKG_CMAKE_CONFIGURE_OPTIONS_DEBUG}) + endif() + + vcpkg_list(SET rel_command + "${CMAKE_COMMAND}" "${arg_SOURCE_PATH}" + -G "${generator}" + ${architecture_options} + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_INSTALL_PREFIX=${CURRENT_PACKAGES_DIR}" + ${arg_OPTIONS} ${arg_OPTIONS_RELEASE}) + vcpkg_list(SET dbg_command + "${CMAKE_COMMAND}" "${arg_SOURCE_PATH}" + -G "${generator}" + ${architecture_options} + "-DCMAKE_BUILD_TYPE=Debug" + "-DCMAKE_INSTALL_PREFIX=${CURRENT_PACKAGES_DIR}/debug" + ${arg_OPTIONS} ${arg_OPTIONS_DEBUG}) + + if(NOT arg_DISABLE_PARALLEL_CONFIGURE) + vcpkg_list(APPEND arg_OPTIONS "-DCMAKE_DISABLE_SOURCE_CHANGES=ON") + + vcpkg_find_acquire_program(NINJA) + + #parallelize the configure step + set(ninja_configure_contents + "rule CreateProcess\n command = \$process\n\n" + ) + + if(NOT DEFINED VCPKG_BUILD_TYPE OR "${VCPKG_BUILD_TYPE}" STREQUAL "release") + z_vcpkg_configure_cmake_build_cmakecache(ninja_configure_contents ".." "rel") + endif() + if(NOT DEFINED VCPKG_BUILD_TYPE OR "${VCPKG_BUILD_TYPE}" STREQUAL "debug") + z_vcpkg_configure_cmake_build_cmakecache(ninja_configure_contents "../../${TARGET_TRIPLET}-dbg" "dbg") + endif() + + file(MAKE_DIRECTORY "${build_dir_release}/vcpkg-parallel-configure") + file(WRITE + "${build_dir_release}/vcpkg-parallel-configure/build.ninja" + "${ninja_configure_contents}") + + message(STATUS "${configuring_message}") + vcpkg_execute_required_process( + COMMAND "${NINJA}" -v + WORKING_DIRECTORY "${build_dir_release}/vcpkg-parallel-configure" + LOGNAME "${arg_LOGFILE_BASE}" + SAVE_LOG_FILES + "../../${TARGET_TRIPLET}-dbg/CMakeCache.txt" ALIAS "dbg-CMakeCache.txt.log" + "../CMakeCache.txt" ALIAS "rel-CMakeCache.txt.log" + "../../${TARGET_TRIPLET}-dbg/CMakeFiles/CMakeConfigureLog.yaml" ALIAS "dbg-CMakeConfigureLog.yaml.log" + "../CMakeFiles/CMakeConfigureLog.yaml" ALIAS "rel-CMakeConfigureLog.yaml.log" + ${parallel_log_args} + ) + + vcpkg_list(APPEND config_logs + "${CURRENT_BUILDTREES_DIR}/${arg_LOGFILE_BASE}-out.log" + "${CURRENT_BUILDTREES_DIR}/${arg_LOGFILE_BASE}-err.log") + else() + if(NOT DEFINED VCPKG_BUILD_TYPE OR "${VCPKG_BUILD_TYPE}" STREQUAL "debug") + message(STATUS "${configuring_message}-dbg") + vcpkg_execute_required_process( + COMMAND ${dbg_command} + WORKING_DIRECTORY "${build_dir_debug}" + LOGNAME "${arg_LOGFILE_BASE}-dbg" + SAVE_LOG_FILES + "CMakeCache.txt" + "CMakeFiles/CMakeConfigureLog.yaml" + ${log_args} + ) + vcpkg_list(APPEND config_logs + "${CURRENT_BUILDTREES_DIR}/${arg_LOGFILE_BASE}-dbg-out.log" + "${CURRENT_BUILDTREES_DIR}/${arg_LOGFILE_BASE}-dbg-err.log") + endif() + + if(NOT DEFINED VCPKG_BUILD_TYPE OR "${VCPKG_BUILD_TYPE}" STREQUAL "release") + message(STATUS "${configuring_message}-rel") + vcpkg_execute_required_process( + COMMAND ${rel_command} + WORKING_DIRECTORY "${build_dir_release}" + LOGNAME "${arg_LOGFILE_BASE}-rel" + SAVE_LOG_FILES + "CMakeCache.txt" + "CMakeFiles/CMakeConfigureLog.yaml" + ${log_args} + ) + vcpkg_list(APPEND config_logs + "${CURRENT_BUILDTREES_DIR}/${arg_LOGFILE_BASE}-rel-out.log" + "${CURRENT_BUILDTREES_DIR}/${arg_LOGFILE_BASE}-rel-err.log") + endif() + endif() + + set(all_unused_variables) + foreach(config_log IN LISTS config_logs) + if(NOT EXISTS "${config_log}") + continue() + endif() + file(READ "${config_log}" log_contents) + debug_message("Reading configure log ${config_log}...") + if(NOT log_contents MATCHES "Manually-specified variables were not used by the project:\n\n(( [^\n]*\n)*)") + continue() + endif() + string(STRIP "${CMAKE_MATCH_1}" unused_variables) # remove leading ` ` and trailing `\n` + string(REPLACE "\n " ";" unused_variables "${unused_variables}") + debug_message("unused variables: ${unused_variables}") + foreach(unused_variable IN LISTS unused_variables) + if(unused_variable IN_LIST manually_specified_variables) + debug_message("manually specified unused variable: ${unused_variable}") + vcpkg_list(APPEND all_unused_variables "${unused_variable}") + else() + debug_message("unused variable (not manually specified): ${unused_variable}") + endif() + endforeach() + endforeach() + + if(DEFINED all_unused_variables) + vcpkg_list(REMOVE_DUPLICATES all_unused_variables) + vcpkg_list(JOIN all_unused_variables "\n " all_unused_variables) + message(WARNING "The following variables are not used in CMakeLists.txt: + ${all_unused_variables} +Please recheck them and remove the unnecessary options from the `vcpkg_cmake_configure` call. +If these options should still be passed for whatever reason, please use the `MAYBE_UNUSED_VARIABLES` argument.") + endif() + + if(NOT arg_Z_CMAKE_GET_VARS_USAGE) + set(Z_VCPKG_CMAKE_GENERATOR "${generator}" CACHE INTERNAL "The generator which was used to configure CMake.") + endif() +endfunction() diff --git a/vcpkg_ports/vcpkg-cmake/vcpkg_cmake_install.cmake b/vcpkg_ports/vcpkg-cmake/vcpkg_cmake_install.cmake new file mode 100644 index 00000000..2bd8b4ea --- /dev/null +++ b/vcpkg_ports/vcpkg-cmake/vcpkg_cmake_install.cmake @@ -0,0 +1,21 @@ +include_guard(GLOBAL) + +function(vcpkg_cmake_install) + cmake_parse_arguments(PARSE_ARGV 0 "arg" "DISABLE_PARALLEL;ADD_BIN_TO_PATH" "" "") + if(DEFINED arg_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "vcpkg_cmake_install was passed extra arguments: ${arg_UNPARSED_ARGUMENTS}") + endif() + + set(args) + foreach(arg IN ITEMS DISABLE_PARALLEL ADD_BIN_TO_PATH) + if(arg_${arg}) + list(APPEND args "${arg}") + endif() + endforeach() + + vcpkg_cmake_build( + ${args} + LOGFILE_BASE install + TARGET install + ) +endfunction()