diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..f67716173 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,47 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +permissions: + contents: read + +env: + ICEBERG_HOME: /tmp/iceberg + +jobs: + build: + name: ${{ matrix.compiler }} / Ubuntu 24.04 + runs-on: ubuntu-24.04 + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + include: + - compiler: gcc-14 + cc: gcc-14 + cxx: g++-14 + - compiler: clang-20 + cc: clang-20 + cxx: clang++-20 + steps: + - uses: actions/checkout@v6 + - name: Install dependencies + run: sudo apt-get update && sudo apt-get install -y libcurl4-openssl-dev ninja-build + - name: Install clang-20 + if: matrix.compiler == 'clang-20' + run: | + wget -qO /tmp/llvm.sh https://apt.llvm.org/llvm.sh + chmod +x /tmp/llvm.sh + sudo /tmp/llvm.sh 20 + - name: Build and test + env: + CC: ${{ matrix.cc }} + CXX: ${{ matrix.cxx }} + run: ci/scripts/build_iceberg.sh $(pwd) ON diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 8b32eb749..8b5ab67c8 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -167,8 +167,13 @@ function(resolve_arrow_dependency) else() set(ARROW_VENDORED FALSE) find_package(Arrow CONFIG REQUIRED) - find_package(Parquet CONFIG REQUIRED) - list(APPEND ICEBERG_SYSTEM_DEPENDENCIES Arrow Parquet) + list(APPEND ICEBERG_SYSTEM_DEPENDENCIES Arrow) + # Parquet may be bundled as a component of Arrow (e.g. Conan's arrow recipe). + # Only do a separate find_package if Arrow didn't already provide the target. + if(NOT TARGET Parquet::parquet_static AND NOT TARGET Parquet::parquet_shared) + find_package(Parquet CONFIG REQUIRED) + endif() + list(APPEND ICEBERG_SYSTEM_DEPENDENCIES Parquet) endif() set(ICEBERG_SYSTEM_DEPENDENCIES @@ -179,6 +184,42 @@ function(resolve_arrow_dependency) PARENT_SCOPE) endfunction() +# ---------------------------------------------------------------------- +# fmt (required by Apache Avro) +# +# We fetch fmt explicitly before avro-cpp to ensure a compatible version. +# Avro-cpp bundles fmt 10.2.1 which has consteval bugs with newer compilers. + +function(resolve_fmt_dependency) + prepare_fetchcontent() + + set(FMT_INSTALL OFF) + + fetchcontent_declare(fmt + ${FC_DECLARE_COMMON_OPTIONS} + GIT_REPOSITORY https://github.com/fmtlib/fmt.git + GIT_TAG 12.1.0 + FIND_PACKAGE_ARGS + NAMES + fmt + CONFIG) + fetchcontent_makeavailable(fmt) + + if(fmt_SOURCE_DIR) + set(FMT_VENDORED TRUE) + else() + set(FMT_VENDORED FALSE) + list(APPEND ICEBERG_SYSTEM_DEPENDENCIES fmt) + endif() + + set(ICEBERG_SYSTEM_DEPENDENCIES + ${ICEBERG_SYSTEM_DEPENDENCIES} + PARENT_SCOPE) + set(FMT_VENDORED + ${FMT_VENDORED} + PARENT_SCOPE) +endfunction() + # ---------------------------------------------------------------------- # Apache Avro @@ -524,6 +565,7 @@ resolve_nlohmann_json_dependency() if(ICEBERG_BUILD_BUNDLE) resolve_arrow_dependency() + resolve_fmt_dependency() resolve_avro_dependency() resolve_zstd_dependency() endif() diff --git a/conanfile.py b/conanfile.py new file mode 100644 index 000000000..f5f280294 --- /dev/null +++ b/conanfile.py @@ -0,0 +1,164 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import os + +from conan import ConanFile +from conan.tools.cmake import CMake, CMakeDeps, CMakeToolchain, cmake_layout +from conan.tools.files import ( + copy, + rm, + rmdir, +) + +required_conan_version = ">=2.1.0" + + +class IcebergCppConan(ConanFile): + name = "iceberg-cpp" + description = "Apache Iceberg C++ client library" + license = "Apache-2.0" f + homepage = "https://github.com/redpanda-data/iceberg-cpp" + url = "https://github.com/redpanda-data/iceberg-cpp" + package_type = "static-library" + settings = "os", "arch", "compiler", "build_type" + options = {"fPIC": [True, False]} + default_options = {"fPIC": True} + + def export_sources(self): + for pattern in [ + "CMakeLists.txt", + "LICENSE", + "NOTICE", + "cmake_modules/*", + "src/*", + ]: + copy(self, pattern, src=self.recipe_folder, dst=self.export_sources_folder) + + def config_options(self): + if self.settings.os == "Windows": + del self.options.fPIC + + def configure(self): + self.options["arrow/*"].with_json = True + + def layout(self): + cmake_layout(self) + + def requirements(self): + self.requires("arrow/22.0.0", transitive_headers=True) + self.requires("libcurl/[>=7.78 <9]") + self.requires("openssl/[>=1.1 <4]") + self.requires("zlib/[>=1.2.11 <2]") + self.requires("snappy/[>=1.1 <2]") + self.requires("zstd/[>=1.5 <2]") + + def generate(self): + tc = CMakeToolchain(self) + tc.variables["ICEBERG_BUILD_BUNDLE"] = True + tc.variables["ICEBERG_BUILD_TESTS"] = False + tc.variables["ICEBERG_BUILD_REST"] = True + tc.variables["ICEBERG_BUILD_STATIC"] = True + tc.variables["ICEBERG_BUILD_SHARED"] = False + tc.variables["CMAKE_FIND_USE_PACKAGE_REGISTRY"] = False + # GCC false positive: inlining std::expected destructor in + # json_internal.cc makes GCC think Error::~Error() frees a non-heap + # pointer. Same family as https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118867 + if self.settings.compiler == "gcc": + tc.extra_cxxflags = ["-Wno-free-nonheap-object"] + tc.generate() + deps = CMakeDeps(self) + deps.generate() + + def build(self): + cmake = CMake(self) + cmake.configure() + cmake.build() + + def package(self): + copy( + self, + "LICENSE", + src=self.source_folder, + dst=os.path.join(self.package_folder, "licenses"), + ) + cmake = CMake(self) + cmake.install() + # Remove vendored Arrow/Parquet — consumers use Conan's Arrow package. + rm( + self, + "libiceberg_vendored_arrow.a", + os.path.join(self.package_folder, "lib"), + ) + rm( + self, + "libiceberg_vendored_parquet.a", + os.path.join(self.package_folder, "lib"), + ) + rm( + self, + "libarrow_bundled_dependencies.a", + os.path.join(self.package_folder, "lib"), + ) + rmdir(self, os.path.join(self.package_folder, "lib", "cmake")) + + def package_info(self): + # Vendored libraries (built and packaged by iceberg-cpp) + self.cpp_info.components["vendored_nanoarrow"].libs = [ + "iceberg_vendored_nanoarrow", + ] + + self.cpp_info.components["vendored_croaring"].libs = [ + "iceberg_vendored_croaring", + ] + + self.cpp_info.components["vendored_avrocpp"].libs = [ + "iceberg_vendored_avrocpp", + ] + self.cpp_info.components["vendored_avrocpp"].requires = [ + "zlib::zlib", + "snappy::snappy", + "zstd::zstd", + ] + + self.cpp_info.components["vendored_cpr"].libs = ["iceberg_vendored_cpr"] + self.cpp_info.components["vendored_cpr"].requires = [ + "libcurl::libcurl", + "openssl::openssl", + ] + + # Core iceberg library + self.cpp_info.components["iceberg"].libs = ["iceberg"] + self.cpp_info.components["iceberg"].requires = [ + "vendored_nanoarrow", + "vendored_croaring", + ] + + # REST catalog support + self.cpp_info.components["iceberg_rest"].libs = ["iceberg_rest"] + self.cpp_info.components["iceberg_rest"].requires = [ + "iceberg", + "vendored_cpr", + ] + + # Bundle (Arrow/Parquet integration) + self.cpp_info.components["iceberg_bundle"].libs = ["iceberg_bundle"] + self.cpp_info.components["iceberg_bundle"].requires = [ + "iceberg", + "arrow::libarrow", + "arrow::libparquet", + "vendored_avrocpp", + ] diff --git a/src/iceberg/arrow/CMakeLists.txt b/src/iceberg/arrow/CMakeLists.txt index 3416d5e95..60345f915 100644 --- a/src/iceberg/arrow/CMakeLists.txt +++ b/src/iceberg/arrow/CMakeLists.txt @@ -16,3 +16,7 @@ # under the License. iceberg_install_all_headers(iceberg/arrow) + +# Also install internal header needed by consumers wrapping arrow::fs::FileSystem as iceberg FileIO. +install(FILES arrow_fs_file_io_internal.h + DESTINATION "${ICEBERG_INSTALL_INCLUDEDIR}/iceberg/arrow") diff --git a/src/iceberg/catalog/rest/http_client.cc b/src/iceberg/catalog/rest/http_client.cc index 2e383b0ae..d45cc5b7c 100644 --- a/src/iceberg/catalog/rest/http_client.cc +++ b/src/iceberg/catalog/rest/http_client.cc @@ -81,6 +81,22 @@ Result BuildHeaders( return cpr::Header(headers.begin(), headers.end()); } +cpr::SslOptions BuildSslOptions(const SslConfig& config) { + cpr::SslOptions opts; + opts.verify_host = config.verify; + opts.verify_peer = config.verify; + if (!config.ca_info.empty()) { + opts.ca_info = config.ca_info; + } + if (!config.ca_path.empty()) { + opts.ca_path = config.ca_path; + } + if (!config.crl_file.empty()) { + opts.crl_file = config.crl_file; + } + return opts; +} + /// \brief Converts a map of string key-value pairs to cpr::Parameters. cpr::Parameters GetParameters( const std::unordered_map& params) { @@ -101,11 +117,18 @@ bool IsSuccessful(int32_t status_code) { /// \brief Builds a default ErrorResponse when the response body cannot be parsed. ErrorResponse BuildDefaultErrorResponse(const cpr::Response& response) { + std::string message; + if (response.error) { + message = response.error.message; + } else if (!response.reason.empty()) { + message = response.reason; + } else { + message = GetStandardReasonPhrase(response.status_code); + } return { .code = static_cast(response.status_code), .type = std::string(kRestExceptionType), - .message = !response.reason.empty() ? response.reason - : GetStandardReasonPhrase(response.status_code), + .message = std::move(message), }; } @@ -133,8 +156,10 @@ Status HandleFailureResponse(const cpr::Response& response, } // namespace -HttpClient::HttpClient(std::unordered_map default_headers) +HttpClient::HttpClient(std::unordered_map default_headers, + SslConfig ssl_config) : default_headers_{std::move(default_headers)}, + ssl_config_{std::move(ssl_config)}, connection_pool_{std::make_unique()} { // Set default Content-Type for all requests (including GET/HEAD/DELETE). // Many systems require that content type is set regardless and will fail, @@ -151,8 +176,8 @@ Result HttpClient::Get( const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE(auto all_headers, BuildHeaders(headers, default_headers_, session)); - cpr::Response response = - cpr::Get(cpr::Url{path}, GetParameters(params), all_headers, *connection_pool_); + cpr::Response response = cpr::Get(cpr::Url{path}, GetParameters(params), all_headers, + BuildSslOptions(ssl_config_), *connection_pool_); ICEBERG_RETURN_UNEXPECTED(HandleFailureResponse(response, error_handler)); HttpResponse http_response; @@ -166,8 +191,8 @@ Result HttpClient::Post( const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE(auto all_headers, BuildHeaders(headers, default_headers_, session)); - cpr::Response response = - cpr::Post(cpr::Url{path}, cpr::Body{body}, all_headers, *connection_pool_); + cpr::Response response = cpr::Post(cpr::Url{path}, cpr::Body{body}, all_headers, + BuildSslOptions(ssl_config_), *connection_pool_); ICEBERG_RETURN_UNEXPECTED(HandleFailureResponse(response, error_handler)); HttpResponse http_response; @@ -191,7 +216,7 @@ Result HttpClient::PostForm( } cpr::Response response = cpr::Post(cpr::Url{path}, cpr::Payload(pair_list.begin(), pair_list.end()), - all_headers, *connection_pool_); + all_headers, BuildSslOptions(ssl_config_), *connection_pool_); ICEBERG_RETURN_UNEXPECTED(HandleFailureResponse(response, error_handler)); HttpResponse http_response; @@ -204,7 +229,8 @@ Result HttpClient::Head( const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE(auto all_headers, BuildHeaders(headers, default_headers_, session)); - cpr::Response response = cpr::Head(cpr::Url{path}, all_headers, *connection_pool_); + cpr::Response response = cpr::Head(cpr::Url{path}, all_headers, + BuildSslOptions(ssl_config_), *connection_pool_); ICEBERG_RETURN_UNEXPECTED(HandleFailureResponse(response, error_handler)); HttpResponse http_response; @@ -218,8 +244,8 @@ Result HttpClient::Delete( const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE(auto all_headers, BuildHeaders(headers, default_headers_, session)); - cpr::Response response = - cpr::Delete(cpr::Url{path}, GetParameters(params), all_headers, *connection_pool_); + cpr::Response response = cpr::Delete(cpr::Url{path}, GetParameters(params), all_headers, + BuildSslOptions(ssl_config_), *connection_pool_); ICEBERG_RETURN_UNEXPECTED(HandleFailureResponse(response, error_handler)); HttpResponse http_response; diff --git a/src/iceberg/catalog/rest/http_client.h b/src/iceberg/catalog/rest/http_client.h index ea9c10a39..52286932f 100644 --- a/src/iceberg/catalog/rest/http_client.h +++ b/src/iceberg/catalog/rest/http_client.h @@ -67,10 +67,19 @@ class ICEBERG_REST_EXPORT HttpResponse { std::unique_ptr impl_; }; +/// \brief SSL/TLS configuration for the HTTP client. +struct ICEBERG_REST_EXPORT SslConfig { + bool verify = true; + std::string ca_info; + std::string ca_path; + std::string crl_file; +}; + /// \brief HTTP client for making requests to Iceberg REST Catalog API. class ICEBERG_REST_EXPORT HttpClient { public: - explicit HttpClient(std::unordered_map default_headers = {}); + explicit HttpClient(std::unordered_map default_headers = {}, + SslConfig ssl_config = {}); ~HttpClient(); HttpClient(const HttpClient&) = delete; @@ -112,6 +121,7 @@ class ICEBERG_REST_EXPORT HttpClient { private: std::unordered_map default_headers_; + SslConfig ssl_config_; std::unique_ptr connection_pool_; }; diff --git a/src/iceberg/catalog/rest/rest_catalog.cc b/src/iceberg/catalog/rest/rest_catalog.cc index ebb03bf84..a63cc5090 100644 --- a/src/iceberg/catalog/rest/rest_catalog.cc +++ b/src/iceberg/catalog/rest/rest_catalog.cc @@ -65,12 +65,30 @@ std::unordered_set GetDefaultEndpoints() { }; } +SslConfig ExtractSslConfig(const std::unordered_map& props) { + SslConfig ssl; + if (auto it = props.find("ssl.verify"); it != props.end()) { + ssl.verify = (it->second != "false"); + } + if (auto it = props.find("ssl.ca-info"); it != props.end()) { + ssl.ca_info = it->second; + } + if (auto it = props.find("ssl.ca-path"); it != props.end()) { + ssl.ca_path = it->second; + } + if (auto it = props.find("ssl.crl-file"); it != props.end()) { + ssl.crl_file = it->second; + } + return ssl; +} + /// \brief Fetch server configuration from the REST catalog server. Result FetchServerConfig(const ResourcePaths& paths, const RestCatalogProperties& current_config, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE(auto config_path, paths.Config()); - HttpClient client(current_config.ExtractHeaders()); + HttpClient client(current_config.ExtractHeaders(), + ExtractSslConfig(current_config.configs())); // Send the client's warehouse location to the service to keep in sync. // This is needed for cases where the warehouse is configured client side, but may @@ -137,7 +155,8 @@ Result> RestCatalog::Make( config.Get(RestCatalogProperties::kNamespaceSeparator))); // Create init session for fetching server configuration - HttpClient init_client(config.ExtractHeaders()); + auto ssl = ExtractSslConfig(config.configs()); + HttpClient init_client(config.ExtractHeaders(), ssl); ICEBERG_ASSIGN_OR_RAISE(auto init_session, auth_manager->InitSession(init_client, config.configs())); ICEBERG_ASSIGN_OR_RAISE(auto server_config, @@ -168,7 +187,8 @@ Result> RestCatalog::Make( // Get snapshot loading mode ICEBERG_ASSIGN_OR_RAISE(auto snapshot_mode, final_config.SnapshotLoadingMode()); - auto client = std::make_unique(final_config.ExtractHeaders()); + auto final_ssl = ExtractSslConfig(final_config.configs()); + auto client = std::make_unique(final_config.ExtractHeaders(), final_ssl); ICEBERG_ASSIGN_OR_RAISE(auto catalog_session, auth_manager->CatalogSession(*client, final_config.configs())); diff --git a/src/iceberg/delete_file_index.cc b/src/iceberg/delete_file_index.cc index 7c8c35032..4c730939c 100644 --- a/src/iceberg/delete_file_index.cc +++ b/src/iceberg/delete_file_index.cc @@ -182,15 +182,15 @@ std::vector> PositionDeletes::Filter(int64_t seq) { if (iter == seqs_.end()) { return {}; } - return files_ | std::views::drop(iter - seqs_.begin()) | - std::views::transform(&ManifestEntry::data_file) | - std::ranges::to>>(); + return std::ranges::to>>( + files_ | std::views::drop(iter - seqs_.begin()) | + std::views::transform(&ManifestEntry::data_file)); } std::vector> PositionDeletes::ReferencedDeleteFiles() { IndexIfNeeded(); - return files_ | std::views::transform(&ManifestEntry::data_file) | - std::ranges::to>>(); + return std::ranges::to>>( + files_ | std::views::transform(&ManifestEntry::data_file)); } void PositionDeletes::IndexIfNeeded() { @@ -202,9 +202,9 @@ void PositionDeletes::IndexIfNeeded() { std::ranges::sort(files_, std::ranges::less{}, &ManifestEntry::sequence_number); // Build sequence number array for binary search - seqs_ = files_ | - std::views::transform([](const auto& e) { return e.sequence_number.value(); }) | - std::ranges::to>(); + seqs_ = std::ranges::to>( + files_ | + std::views::transform([](const auto& e) { return e.sequence_number.value(); })); indexed_ = true; } @@ -243,9 +243,8 @@ Result>> EqualityDeletes::Filter( std::vector> EqualityDeletes::ReferencedDeleteFiles() { IndexIfNeeded(); - return files_ | - std::views::transform([](const auto& f) { return f.wrapped.data_file; }) | - std::ranges::to>>(); + return std::ranges::to>>( + files_ | std::views::transform([](const auto& f) { return f.wrapped.data_file; })); } void EqualityDeletes::IndexIfNeeded() { @@ -258,8 +257,8 @@ void EqualityDeletes::IndexIfNeeded() { &EqualityDeleteFile::apply_sequence_number); // Build sequence number array for binary search - seqs_ = files_ | std::views::transform(&EqualityDeleteFile::apply_sequence_number) | - std::ranges::to>(); + seqs_ = std::ranges::to>( + files_ | std::views::transform(&EqualityDeleteFile::apply_sequence_number)); indexed_ = true; } diff --git a/src/iceberg/json_serde.cc b/src/iceberg/json_serde.cc index 2d8c22255..cab1096f9 100644 --- a/src/iceberg/json_serde.cc +++ b/src/iceberg/json_serde.cc @@ -209,6 +209,10 @@ constexpr std::string_view kSnapshotIds = "snapshot-ids"; constexpr std::string_view kRefName = "ref-name"; constexpr std::string_view kUpdates = "updates"; constexpr std::string_view kRemovals = "removals"; +// The Iceberg REST spec uses "ref" (not "ref-name") for the +// assert-ref-snapshot-id requirement. "ref-name" is correct only for +// table-update actions (set-snapshot-ref, remove-snapshot-ref). +constexpr std::string_view kRef = "ref"; // TableRequirement type constants constexpr std::string_view kRequirementAssertDoesNotExist = "assert-create"; @@ -1491,7 +1495,7 @@ nlohmann::json ToJson(const TableRequirement& requirement) { const auto& r = internal::checked_cast(requirement); json[kType] = kRequirementAssertRefSnapshotID; - json[kRefName] = r.ref_name(); + json[kRef] = r.ref_name(); if (r.snapshot_id().has_value()) { json[kSnapshotId] = r.snapshot_id().value(); } else { @@ -1688,7 +1692,7 @@ Result> TableRequirementFromJson( return std::make_unique(std::move(uuid)); } if (type == kRequirementAssertRefSnapshotID) { - ICEBERG_ASSIGN_OR_RAISE(auto ref_name, GetJsonValue(json, kRefName)); + ICEBERG_ASSIGN_OR_RAISE(auto ref_name, GetJsonValue(json, kRef)); ICEBERG_ASSIGN_OR_RAISE(auto snapshot_id_opt, GetJsonValueOptional(json, kSnapshotId)); return std::make_unique(std::move(ref_name), diff --git a/src/iceberg/sort_order.cc b/src/iceberg/sort_order.cc index b317efb90..faf8e8d43 100644 --- a/src/iceberg/sort_order.cc +++ b/src/iceberg/sort_order.cc @@ -134,16 +134,16 @@ Result> SortOrder::Make(int32_t sort_id, std::unordered_set SortOrder::OrderPreservingSortedColumns( const Schema& schema, const SortOrder& order) { - return order.fields() | std::views::filter([&schema](const SortField& field) { - return field.transform()->PreservesOrder(); - }) | - std::views::transform([&schema](const SortField& field) { - return schema.FindColumnNameById(field.source_id()) - .value_or(std::nullopt) - .value_or(""); - }) | - std::views::filter([](std::string_view name) { return !name.empty(); }) | - std::ranges::to>(); + return std::ranges::to>( + order.fields() | std::views::filter([&schema](const SortField& field) { + return field.transform()->PreservesOrder(); + }) | + std::views::transform([&schema](const SortField& field) { + return schema.FindColumnNameById(field.source_id()) + .value_or(std::nullopt) + .value_or(""); + }) | + std::views::filter([](std::string_view name) { return !name.empty(); })); } } // namespace iceberg diff --git a/src/iceberg/table_metadata.cc b/src/iceberg/table_metadata.cc index d3b5629c8..a9c4479f5 100644 --- a/src/iceberg/table_metadata.cc +++ b/src/iceberg/table_metadata.cc @@ -377,34 +377,34 @@ Result TableMetadataCache::GetSnapshotsById Result TableMetadataCache::InitSchemasMap( const TableMetadata* metadata) { - return metadata->schemas | std::views::transform([](const auto& schema) { - return std::make_pair(schema->schema_id(), schema); - }) | - std::ranges::to(); + return std::ranges::to(metadata->schemas | + std::views::transform([](const auto& schema) { + return std::make_pair(schema->schema_id(), schema); + })); } Result TableMetadataCache::InitPartitionSpecsMap( const TableMetadata* metadata) { - return metadata->partition_specs | std::views::transform([](const auto& spec) { - return std::make_pair(spec->spec_id(), spec); - }) | - std::ranges::to(); + return std::ranges::to( + metadata->partition_specs | std::views::transform([](const auto& spec) { + return std::make_pair(spec->spec_id(), spec); + })); } Result TableMetadataCache::InitSortOrdersMap( const TableMetadata* metadata) { - return metadata->sort_orders | std::views::transform([](const auto& order) { - return std::make_pair(order->order_id(), order); - }) | - std::ranges::to(); + return std::ranges::to(metadata->sort_orders | + std::views::transform([](const auto& order) { + return std::make_pair(order->order_id(), order); + })); } Result TableMetadataCache::InitSnapshotMap( const TableMetadata* metadata) { - return metadata->snapshots | std::views::transform([](const auto& snapshot) { - return std::make_pair(snapshot->snapshot_id, snapshot); - }) | - std::ranges::to(); + return std::ranges::to( + metadata->snapshots | std::views::transform([](const auto& snapshot) { + return std::make_pair(snapshot->snapshot_id, snapshot); + })); } Result TableMetadataUtil::Codec::FromString( @@ -499,7 +499,7 @@ void TableMetadataUtil::DeleteRemovedMetadataFiles(FileIO& io, const TableMetada metadata.properties.Get(TableProperties::kMetadataDeleteAfterCommitEnabled); if (delete_after_commit) { auto current_files = - metadata.metadata_log | std::ranges::to>(); + std::ranges::to>(metadata.metadata_log); std::ranges::for_each( base->metadata_log | std::views::filter([¤t_files](const auto& entry) { return !current_files.contains(entry); @@ -862,7 +862,7 @@ Result TableMetadataBuilder::Impl::AddPartitionSpec(const PartitionSpec ICEBERG_ASSIGN_OR_RAISE( std::shared_ptr new_spec, - PartitionSpec::Make(new_spec_id, spec.fields() | std::ranges::to())); + PartitionSpec::Make(new_spec_id, std::ranges::to(spec.fields()))); metadata_.last_partition_id = std::max(metadata_.last_partition_id, new_spec->last_assigned_field_id()); metadata_.partition_specs.push_back(new_spec); @@ -933,7 +933,7 @@ Status TableMetadataBuilder::Impl::SetCurrentSchema(int32_t schema_id) { ICEBERG_ASSIGN_OR_RAISE( auto updated_spec, PartitionSpec::Make(partition_spec->spec_id(), - partition_spec->fields() | std::ranges::to())); + std::ranges::to(partition_spec->fields()))); ICEBERG_RETURN_UNEXPECTED( PartitionSpec::ValidatePartitionName(*schema, *updated_spec)); @@ -953,7 +953,7 @@ Status TableMetadataBuilder::Impl::SetCurrentSchema(int32_t schema_id) { ICEBERG_ASSIGN_OR_RAISE( auto updated_order, SortOrder::Make(sort_order->order_id(), - sort_order->fields() | std::ranges::to())); + std::ranges::to(sort_order->fields()))); updated_orders.push_back(std::move(updated_order)); } metadata_.sort_orders = std::move(updated_orders); @@ -983,10 +983,10 @@ Status TableMetadataBuilder::Impl::RemoveSchemas( "Cannot remove current schema: {}", current_schema_id); if (!schema_ids.empty()) { - metadata_.schemas = metadata_.schemas | std::views::filter([&](const auto& schema) { - return !schema_ids.contains(schema->schema_id()); - }) | - std::ranges::to>>(); + metadata_.schemas = std::ranges::to>>( + metadata_.schemas | std::views::filter([&](const auto& schema) { + return !schema_ids.contains(schema->schema_id()); + })); changes_.push_back(std::make_unique(schema_ids)); } @@ -1023,7 +1023,7 @@ Result TableMetadataBuilder::Impl::AddSchema(const Schema& schema, metadata_.last_column_id = new_last_column_id; ICEBERG_ASSIGN_OR_RAISE(std::shared_ptr new_schema, - Schema::Make(schema.fields() | std::ranges::to(), + Schema::Make(std::ranges::to(schema.fields()), new_schema_id, schema.IdentifierFieldIds())) if (!schema_found) { @@ -1479,10 +1479,10 @@ Status TableMetadataBuilder::Impl::RemovePartitionSpecs( "Cannot remove the default partition spec"); metadata_.partition_specs = - metadata_.partition_specs | std::views::filter([&](const auto& spec) { - return !spec_ids_to_remove.contains(spec->spec_id()); - }) | - std::ranges::to>>(); + std::ranges::to>>( + metadata_.partition_specs | std::views::filter([&](const auto& spec) { + return !spec_ids_to_remove.contains(spec->spec_id()); + })); changes_.push_back(std::make_unique(spec_ids)); return {}; diff --git a/src/iceberg/test/delete_file_index_test.cc b/src/iceberg/test/delete_file_index_test.cc index b99a2816b..3deb5839e 100644 --- a/src/iceberg/test/delete_file_index_test.cc +++ b/src/iceberg/test/delete_file_index_test.cc @@ -212,9 +212,8 @@ class DeleteFileIndexTest : public testing::TestWithParam { // Helper to extract paths from delete files for comparison static std::vector GetPaths( const std::vector>& files) { - return std::ranges::transform_view(files, - [](const auto& f) { return f->file_path; }) | - std::ranges::to>(); + return std::ranges::to>( + std::ranges::transform_view(files, [](const auto& f) { return f->file_path; })); } }; diff --git a/src/iceberg/test/json_serde_test.cc b/src/iceberg/test/json_serde_test.cc index f019375d3..df9da9c39 100644 --- a/src/iceberg/test/json_serde_test.cc +++ b/src/iceberg/test/json_serde_test.cc @@ -681,7 +681,7 @@ TEST(TableRequirementJsonTest, TableRequirementAssertUUID) { TEST(TableRequirementJsonTest, TableRequirementAssertRefSnapshotID) { table::AssertRefSnapshotID req("main", 123456789); nlohmann::json expected = - R"({"type":"assert-ref-snapshot-id","ref-name":"main","snapshot-id":123456789})"_json; + R"({"type":"assert-ref-snapshot-id","ref":"main","snapshot-id":123456789})"_json; EXPECT_EQ(ToJson(req), expected); auto parsed = TableRequirementFromJson(expected); @@ -693,7 +693,7 @@ TEST(TableRequirementJsonTest, TableRequirementAssertRefSnapshotID) { TEST(TableRequirementJsonTest, TableRequirementAssertRefSnapshotIDWithNull) { table::AssertRefSnapshotID req("main", std::nullopt); nlohmann::json expected = - R"({"type":"assert-ref-snapshot-id","ref-name":"main","snapshot-id":null})"_json; + R"({"type":"assert-ref-snapshot-id","ref":"main","snapshot-id":null})"_json; EXPECT_EQ(ToJson(req), expected); auto parsed = TableRequirementFromJson(expected); diff --git a/src/iceberg/test/manifest_group_test.cc b/src/iceberg/test/manifest_group_test.cc index 34ff9993b..dab9ec235 100644 --- a/src/iceberg/test/manifest_group_test.cc +++ b/src/iceberg/test/manifest_group_test.cc @@ -215,18 +215,16 @@ class ManifestGroupTest : public testing::TestWithParam { static std::vector GetPaths( const std::vector>& tasks) { - return tasks | std::views::transform([](const auto& task) { - return task->data_file()->file_path; - }) | - std::ranges::to>(); + return std::ranges::to>( + tasks | std::views::transform( + [](const auto& task) { return task->data_file()->file_path; })); } static std::vector GetEntryPaths( const std::vector& entries) { - return entries | std::views::transform([](const auto& entry) { - return entry.data_file->file_path; - }) | - std::ranges::to>(); + return std::ranges::to>( + entries | std::views::transform( + [](const auto& entry) { return entry.data_file->file_path; })); } std::shared_ptr file_io_; diff --git a/src/iceberg/test/scan_test_base.h b/src/iceberg/test/scan_test_base.h index 65a4e0531..eda5597f8 100644 --- a/src/iceberg/test/scan_test_base.h +++ b/src/iceberg/test/scan_test_base.h @@ -160,10 +160,9 @@ class ScanTestBase : public testing::TestWithParam { /// \brief Extract file paths from scan tasks. static std::vector GetPaths( const std::vector>& tasks) { - return tasks | std::views::transform([](const auto& task) { - return task->data_file()->file_path; - }) | - std::ranges::to>(); + return std::ranges::to>( + tasks | std::views::transform( + [](const auto& task) { return task->data_file()->file_path; })); } /// \brief Create table metadata with the given snapshots. diff --git a/src/iceberg/test/table_metadata_builder_test.cc b/src/iceberg/test/table_metadata_builder_test.cc index 4146cb01f..3c8465827 100644 --- a/src/iceberg/test/table_metadata_builder_test.cc +++ b/src/iceberg/test/table_metadata_builder_test.cc @@ -104,7 +104,7 @@ TEST(TableMetadataTest, Make) { auto metadata, TableMetadata::Make(*Schema, *spec, *order, "s3://bucket/test", {})); // Check schema fields ASSERT_EQ(1, metadata->schemas.size()); - auto fields = metadata->schemas[0]->fields() | std::ranges::to(); + auto fields = std::ranges::to(metadata->schemas[0]->fields()); ASSERT_EQ(3, fields.size()); EXPECT_EQ(1, fields[0].field_id()); EXPECT_EQ("id", fields[0].name()); @@ -119,8 +119,7 @@ TEST(TableMetadataTest, Make) { // Check partition spec ASSERT_EQ(1, metadata->partition_specs.size()); EXPECT_EQ(PartitionSpec::kInitialSpecId, metadata->partition_specs[0]->spec_id()); - auto spec_fields = - metadata->partition_specs[0]->fields() | std::ranges::to(); + auto spec_fields = std::ranges::to(metadata->partition_specs[0]->fields()); ASSERT_EQ(1, spec_fields.size()); EXPECT_EQ(PartitionSpec::kInvalidPartitionFieldId + 1, spec_fields[0].field_id()); EXPECT_EQ(2, spec_fields[0].source_id()); @@ -129,7 +128,7 @@ TEST(TableMetadataTest, Make) { // Check sort order ASSERT_EQ(1, metadata->sort_orders.size()); EXPECT_EQ(SortOrder::kInitialSortOrderId, metadata->sort_orders[0]->order_id()); - auto order_fields = metadata->sort_orders[0]->fields() | std::ranges::to(); + auto order_fields = std::ranges::to(metadata->sort_orders[0]->fields()); ASSERT_EQ(1, order_fields.size()); EXPECT_EQ(3, order_fields[0].source_id()); EXPECT_EQ(SortDirection::kAscending, order_fields[0].direction()); diff --git a/src/iceberg/update/snapshot_update.cc b/src/iceberg/update/snapshot_update.cc index 3e5792667..5aee2eba2 100644 --- a/src/iceberg/update/snapshot_update.cc +++ b/src/iceberg/update/snapshot_update.cc @@ -311,10 +311,9 @@ Status SnapshotUpdate::Finalize(std::optional commit_error) { "Staged snapshot is null during finalize after commit"); auto cached_snapshot = SnapshotCache(staged_snapshot_.get()); ICEBERG_ASSIGN_OR_RAISE(auto manifests, cached_snapshot.Manifests(ctx_->table->io())); - CleanUncommitted(manifests | std::views::transform([](const auto& manifest) { - return manifest.manifest_path; - }) | - std::ranges::to>()); + CleanUncommitted(std::ranges::to>( + manifests | std::views::transform( + [](const auto& manifest) { return manifest.manifest_path; }))); } // Also clean up unused manifest lists created by multiple attempts diff --git a/src/iceberg/update/update_schema.cc b/src/iceberg/update/update_schema.cc index 1f35781fa..ba1a02929 100644 --- a/src/iceberg/update/update_schema.cc +++ b/src/iceberg/update/update_schema.cc @@ -542,7 +542,7 @@ UpdateSchema& UpdateSchema::UnionByNameWith(std::shared_ptr new_schema) UpdateSchema& UpdateSchema::SetIdentifierFields( const std::span& names) { - identifier_field_names_ = names | std::ranges::to>(); + identifier_field_names_ = std::ranges::to>(names); return *this; } @@ -589,7 +589,7 @@ Result UpdateSchema::Apply() { fresh_identifier_ids.push_back(field_opt->get().field_id()); } - auto new_fields = temp_schema->fields() | std::ranges::to>(); + auto new_fields = std::ranges::to>(temp_schema->fields()); ICEBERG_ASSIGN_OR_RAISE( auto new_schema, Schema::Make(std::move(new_fields), schema_->schema_id(), fresh_identifier_ids)); diff --git a/src/iceberg/util/projection_util_internal.h b/src/iceberg/util/projection_util_internal.h index df4fe9789..53a1b13e9 100644 --- a/src/iceberg/util/projection_util_internal.h +++ b/src/iceberg/util/projection_util_internal.h @@ -351,10 +351,8 @@ class ProjectionUtil { } } if (has_negative_value) { - auto values = - std::views::transform(value_set, - [](int32_t value) { return Literal::Int(value); }) | - std::ranges::to(); + auto values = std::ranges::to(std::views::transform( + value_set, [](int32_t value) { return Literal::Int(value); })); return UnboundPredicateImpl::Make(Expression::Operation::kIn, std::move(projected->term()), std::move(values)); @@ -455,10 +453,8 @@ class ProjectionUtil { } } if (has_negative_value) { - auto values = - std::views::transform(value_set, - [](int32_t value) { return Literal::Int(value); }) | - std::ranges::to(); + auto values = std::ranges::to(std::views::transform( + value_set, [](int32_t value) { return Literal::Int(value); })); return UnboundPredicateImpl::Make(Expression::Operation::kNotIn, std::move(projected->term()), std::move(values)); diff --git a/src/iceberg/util/snapshot_util.cc b/src/iceberg/util/snapshot_util.cc index 49019408b..f4ae41127 100644 --- a/src/iceberg/util/snapshot_util.cc +++ b/src/iceberg/util/snapshot_util.cc @@ -288,11 +288,10 @@ Result>> SnapshotUtil::AncestorsOf( Result> SnapshotUtil::ToIds( const std::vector>& snapshots) { - return snapshots | - std::views::filter([](const auto& snapshot) { return snapshot != nullptr; }) | - std::views::transform( - [](const auto& snapshot) { return snapshot->snapshot_id; }) | - std::ranges::to>(); + return std::ranges::to>( + snapshots | + std::views::filter([](const auto& snapshot) { return snapshot != nullptr; }) | + std::views::transform([](const auto& snapshot) { return snapshot->snapshot_id; })); } Result> SnapshotUtil::SnapshotAfter(const Table& table, diff --git a/src/iceberg/util/string_util.h b/src/iceberg/util/string_util.h index 36dfba30f..2a334fb26 100644 --- a/src/iceberg/util/string_util.h +++ b/src/iceberg/util/string_util.h @@ -41,13 +41,17 @@ concept FromChars = requires(const char* p, T& v) { std::from_chars(p, p, v); }; class ICEBERG_EXPORT StringUtils { public: static std::string ToLower(std::string_view str) { - return str | std::ranges::views::transform([](char c) { return std::tolower(c); }) | - std::ranges::to(); + std::string result(str); + std::ranges::transform(result, result.begin(), + [](unsigned char c) { return std::tolower(c); }); + return result; } static std::string ToUpper(std::string_view str) { - return str | std::ranges::views::transform([](char c) { return std::toupper(c); }) | - std::ranges::to(); + std::string result(str); + std::ranges::transform(result, result.begin(), + [](unsigned char c) { return std::toupper(c); }); + return result; } static bool EqualsIgnoreCase(std::string_view lhs, std::string_view rhs) { diff --git a/src/iceberg/util/type_util.cc b/src/iceberg/util/type_util.cc index c6b9bb3ed..f539f5ea2 100644 --- a/src/iceberg/util/type_util.cc +++ b/src/iceberg/util/type_util.cc @@ -357,10 +357,9 @@ std::shared_ptr AssignFreshIdVisitor::Visit( } std::shared_ptr AssignFreshIdVisitor::Visit(const StructType& type) const { - auto fresh_ids = + auto fresh_ids = std::ranges::to>( type.fields() | - std::views::transform([&](const auto& /* unused */) { return next_id_(); }) | - std::ranges::to>(); + std::views::transform([&](const auto& /* unused */) { return next_id_(); })); std::vector fresh_fields; for (size_t i = 0; i < type.fields().size(); ++i) { const auto& field = type.fields()[i]; @@ -402,7 +401,7 @@ Result> AssignFreshIds(int32_t schema_id, const Schema& auto fresh_type = AssignFreshIdVisitor(std::move(next_id)) .Visit(internal::checked_cast(schema)); std::vector fields = - fresh_type->fields() | std::ranges::to>(); + std::ranges::to>(fresh_type->fields()); ICEBERG_ASSIGN_OR_RAISE(auto identifier_field_names, schema.IdentifierFieldNames()); return Schema::Make(std::move(fields), schema_id, identifier_field_names); }