diff --git a/.devcontainer/clang/Dockerfile.clang b/.devcontainer/clang/Dockerfile.clang new file mode 100644 index 000000000..8287b0ed2 --- /dev/null +++ b/.devcontainer/clang/Dockerfile.clang @@ -0,0 +1,140 @@ +FROM debian:bookworm + +ENV CMAKE_VERSION=3.31.8 +ENV MOLD_VERSION=2.40.4 + +ARG DEB_REGION="" +ARG https_proxy="" +ARG no_proxy="" + +# check if deb region is not empty string. If it is, replace the default debian repository with the one for the region. +RUN if [ "$DEB_REGION" != "" ]; then \ + sed -i "s|http://deb.debian.org/debian|http://ftp.${DEB_REGION}.debian.org/debian|g" /etc/apt/sources.list.d/debian.sources; \ +fi + +RUN apt-get update && \ + apt-get install -y \ + curl \ + tar + +RUN arch=$(arch) && \ + https_proxy=${https_proxy} \ + curl -L -# -o /tmp/cmake.tar.gz https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CMAKE_VERSION-linux-${arch}.tar.gz && \ + tar -xzvf /tmp/cmake.tar.gz -C /opt && \ + ln -s /opt/cmake-$CMAKE_VERSION-linux-${arch}/bin/cmake /usr/local/bin/cmake && \ + ln -s /opt/cmake-$CMAKE_VERSION-linux-${arch}/bin/ctest /usr/local/bin/ctest && \ + rm /tmp/cmake.tar.gz + +RUN apt-get update && apt-get install -y \ + sudo \ + bash \ + vim-tiny \ + vim \ + sudo \ + git \ + gdb \ + python3 \ + python3-pip \ + python3-virtualenv \ + ssh-client \ + ninja-build \ + bison \ + maven \ + net-tools \ + telnet \ + ssh \ + rsync \ + openssh-server \ + lld \ + ccache \ + binutils-dev \ + make \ + automake \ + autoconf \ + htop \ + less \ + man \ + nodejs \ + npm \ + bsdextrautils \ + locales \ + clang-19 \ + clang++-19 \ + && update-alternatives --install /usr/bin/clang clang /usr/bin/clang-19 100 \ + && update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-19 100 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN ln -sf /bin/bash /bin/sh + +RUN https_proxy=${https_proxy} curl -L -O --output-dir /tmp https://github.com/rui314/mold/releases/download/v$MOLD_VERSION/mold-$MOLD_VERSION-$(arch)-linux.tar.gz +RUN cd /tmp && \ + sudo tar -C /usr/local --strip-components=1 -xzf mold-$MOLD_VERSION-$(arch)-linux.tar.gz + +# Add mold's ld as the first on the path +env PATH=/usr/local/libexec/mold:$PATH + +# License header check tool +RUN https_proxy=${https_proxy} curl -L -# -o /tmp/skywalking-eyes-bin.tgz https://dlcdn.apache.org/skywalking/eyes/0.8.0/skywalking-license-eye-0.8.0-bin.tgz && \ + mkdir -p /opt/skywalking-license-eye && \ + sudo tar -C /opt/skywalking-license-eye --strip-components=1 -xzf /tmp/skywalking-eyes-bin.tgz + +ENV PATH="/opt/skywalking-license-eye/bin/linux:${PATH}" + +RUN export arch=$(arch | sed 's/^aarch64$/arm64/; s/^x86_64$/amd64/') && \ + export https_proxy=${https_proxy} && \ + export filename=go1.25.5.linux-${arch}.tar.gz && \ + curl -L -O --output-dir /tmp https://go.dev/dl/${filename} && \ + sudo rm -rf /usr/local/bin/go && tar -C /usr/local -xzf /tmp/${filename} && \ + rm /tmp/${filename} +ENV PATH="/usr/local/go/bin:${PATH}" + +# Set up default python virtualenv, and make it the default on PATH +RUN virtualenv -p python3 /opt/venv +ENV PATH="/opt/venv/bin:$PATH" +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt && \ + rm -f requirements.txt + +ARG USERNAME=code +ARG USER_UID=1000 +ARG USER_GID=$USER_UID + +RUN groupadd --gid $USER_GID $USERNAME \ + && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \ + && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ + && chmod 0440 /etc/sudoers.d/$USERNAME + +USER $USERNAME + +# Configure default conan profile to use the mold linker +RUN arch=$(uname -m) && mkdir -p ~/.conan2/profiles && cat > ~/.conan2/profiles/default <> ~/.bashrc && \ + echo "export LANG=en_US.UTF-8" >> ~/.bashrc && \ + echo "export LANGUAGE=en_US.UTF-8" >> ~/.bashrc + + +ENV SHELL=/bin/bash + +WORKDIR /workspace + +ENTRYPOINT ["/bin/bash", "-c"] diff --git a/.devcontainer/clang/devcontainer.json b/.devcontainer/clang/devcontainer.json new file mode 100644 index 000000000..ff7a8a0cf --- /dev/null +++ b/.devcontainer/clang/devcontainer.json @@ -0,0 +1,44 @@ +{ + "name": "Bolt Clang Development Container", + "build": { + "dockerfile": "Dockerfile.clang", + "context": "../../.github/runners", + "args": { + "DEB_REGION": "", + "https_proxy": "", + "no_proxy": "" + } + }, + "containerEnv": { + "https_proxy": "", + "no_proxy": "", + "SHELL": "/bin/bash" + }, + "remoteUser": "code", + // Use 'forwardPorts' to make a list of ports inside the container available locally. + "forwardPorts": [], + // Use 'portsAttributes' to set default properties for specific forwarded ports. + "portsAttributes": {}, + // Use 'remoteEnv' to set environment variables that are only available inside the container. + "remoteEnv": {}, + // Use 'mounts' to make files or directories from your local machine available inside the container. + "mounts": [ + "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=cached", + "source=conan-cache,target=/home/code/.conan2/p", + "source=ccache-cache,target=/home/code/.ccache" + ], + "runArgs": [ + "--security-opt", + "label=disable" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd" + ] + } + }, + // Configure tool-specific properties. + "features": {}, + "postCreateCommand": ".devcontainer/post_create_command.sh" +} diff --git a/CMakeLists.txt b/CMakeLists.txt index 9e11dd9b6..0faaf45c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,6 +89,12 @@ elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin") add_definitions(-D OS_MACOSX) endif() +# Only use compiler-rt when building with Clang on Linux for ARM/AArch64 +if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND OS_LINUX AND (ARCH_AARCH64 OR ARCH_ARM)) + add_compile_options(-rtlib=compiler-rt) + add_link_options(-rtlib=compiler-rt) +endif() + list(PREPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMake" "${PROJECT_SOURCE_DIR}/CMake/third-party" ) diff --git a/bolt/duckdb/conversion/tests/DuckParserTest.cpp b/bolt/duckdb/conversion/tests/DuckParserTest.cpp index 5435a5c05..5a8db04eb 100644 --- a/bolt/duckdb/conversion/tests/DuckParserTest.cpp +++ b/bolt/duckdb/conversion/tests/DuckParserTest.cpp @@ -37,8 +37,8 @@ using namespace bytedance::bolt::duckdb; namespace { std::shared_ptr parseExpr(const std::string& exprString) { - ParseOptions options; - return parseExpr(exprString, options); + duckdb::ParseOptions options; + return duckdb::parseExpr(exprString, options); } } // namespace @@ -297,30 +297,33 @@ TEST(DuckParserTest, between) { TEST(DuckParserTest, interval) { auto parseInterval = [](const std::string& sql) { - auto expr = - std::dynamic_pointer_cast(parseExpr(sql)); - BOLT_CHECK_NOT_NULL(expr); - - auto value = - INTERVAL_DAY_TIME()->valueToString(expr->value().value()); - if (expr->alias()) { - return fmt::format("{} AS {}", value, expr->alias().value()); - } - - return value; + auto parsed = parseExpr(sql); + return parsed->toString(); }; - EXPECT_EQ("0 05:00:00.000", parseInterval("INTERVAL 5 HOURS")); - EXPECT_EQ("0 00:36:00.000", parseInterval("INTERVAL 36 MINUTES")); - EXPECT_EQ("0 00:00:07.000", parseInterval("INTERVAL 7 SECONDS")); - EXPECT_EQ("0 00:00:00.123", parseInterval("INTERVAL 123 MILLISECONDS")); - - EXPECT_EQ("0 00:00:12.345", parseInterval("INTERVAL 12345 MILLISECONDS")); - EXPECT_EQ("0 03:25:45.678", parseInterval("INTERVAL 12345678 MILLISECONDS")); - EXPECT_EQ("1 03:48:20.100", parseInterval("INTERVAL 100100100 MILLISECONDS")); + EXPECT_EQ( + "to_hours(cast(trunc(cast(5, DOUBLE)), BIGINT))", + parseInterval("INTERVAL 5 HOURS")); + EXPECT_EQ( + "to_minutes(cast(trunc(cast(36, DOUBLE)), BIGINT))", + parseInterval("INTERVAL 36 MINUTES")); + EXPECT_EQ("to_seconds(cast(7, DOUBLE))", parseInterval("INTERVAL 7 SECONDS")); + EXPECT_EQ( + "to_milliseconds(cast(123, DOUBLE))", + parseInterval("INTERVAL 123 MILLISECONDS")); + EXPECT_EQ( + "to_milliseconds(cast(12345, DOUBLE))", + parseInterval("INTERVAL 12345 MILLISECONDS")); + EXPECT_EQ( + "to_milliseconds(cast(12345678, DOUBLE))", + parseInterval("INTERVAL 12345678 MILLISECONDS")); + EXPECT_EQ( + "to_milliseconds(cast(100100100, DOUBLE))", + parseInterval("INTERVAL 100100100 MILLISECONDS")); EXPECT_EQ( - "0 00:00:00.011 AS x", parseInterval("INTERVAL 11 MILLISECONDS AS x")); + "to_milliseconds(cast(11, DOUBLE)) AS x", + parseInterval("INTERVAL 11 MILLISECONDS AS x")); } TEST(DuckParserTest, cast) { diff --git a/bolt/dwio/dwrf/test/ColumnWriterTest.cpp b/bolt/dwio/dwrf/test/ColumnWriterTest.cpp index 0dfffc35c..6cb53d25a 100644 --- a/bolt/dwio/dwrf/test/ColumnWriterTest.cpp +++ b/bolt/dwio/dwrf/test/ColumnWriterTest.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "bolt/common/memory/Memory.h" #include "bolt/dwio/common/IntDecoder.h" @@ -47,6 +48,7 @@ #include "bolt/dwio/dwrf/writer/Writer.h" #include "bolt/type/Type.h" #include "bolt/vector/DictionaryVector.h" +#include "bolt/vector/SelectivityVector.h" #include "bolt/vector/tests/utils/VectorMaker.h" using namespace ::testing; @@ -58,6 +60,30 @@ using namespace bytedance::bolt::memory; using folly::Random; namespace bytedance::bolt::dwrf { +template +struct IsArrayType : std::false_type {}; + +template +struct IsArrayType> : std::true_type {}; + +template +struct IsMapType : std::false_type {}; + +template +struct IsMapType> : std::true_type {}; + +template +struct IsRowType : std::false_type {}; + +template +struct IsRowType> : std::true_type {}; + +template +struct IsComplexType + : std::bool_constant< + IsArrayType::value || IsMapType::value || IsRowType::value> { +}; + class MockStrideIndexProvider : public StrideIndexProvider { public: MOCK_CONST_METHOD0(getStrideIndex, uint64_t()); @@ -868,10 +894,16 @@ void mapToStruct( // initialize children of batch size filled with nulls VectorMaker maker{&pool}; for (auto column = 0; column < uniqueKeys.size(); column++) { - childrenVectors[column] = - maker.allNullFlatVector(origBatch->size()); - // only flat for scalar types - // create function to handle nested complex types + if constexpr (IsComplexType::value) { + auto valueType = CppToType::create(); + auto child = BaseVector::create(valueType, origBatch->size(), &pool); + SelectivityVector nullRows(origBatch->size(), true); + child->addNulls(nullRows); + childrenVectors[column] = child; + } else { + childrenVectors[column] = + maker.allNullFlatVector(origBatch->size()); + } } batches[i] = maker.rowVector(childrenVectors); auto batchStruct = std::dynamic_pointer_cast(batches[i]); @@ -883,26 +915,47 @@ void mapToStruct( auto flatKeys = std::dynamic_pointer_cast>(keys); ASSERT_TRUE(flatKeys); auto values = mapBatch->mapValues(); - auto flatValues = std::dynamic_pointer_cast>(values); - ASSERT_TRUE(flatValues); - - auto offsets = mapBatch->offsets()->as(); - auto sizes = mapBatch->sizes()->as(); - - // for each row in current batch - for (vector_size_t row = 0; row < mapBatch->size(); row++) { - // for each key in row (single map) - for (vector_size_t index = offsets[row], - endOffset = offsets[row] + sizes[row]; - index < endOffset; - index++) { - ASSERT_FALSE(flatKeys->isNullAt(index)); - // set value in correct row - auto key = flatKeys->valueAt(index); - auto element = std::dynamic_pointer_cast>( - batchStruct->childAt(keyColIndex[key])); - ASSERT_TRUE(element); - element->set(row, flatValues->valueAt(index)); + if constexpr (IsComplexType::value) { + auto offsets = mapBatch->offsets()->as(); + auto sizes = mapBatch->sizes()->as(); + + // for each row in current batch + for (vector_size_t row = 0; row < mapBatch->size(); row++) { + // for each key in row (single map) + for (vector_size_t index = offsets[row], + endOffset = offsets[row] + sizes[row]; + index < endOffset; + index++) { + ASSERT_FALSE(flatKeys->isNullAt(index)); + // set value in correct row + auto key = flatKeys->valueAt(index); + auto element = batchStruct->childAt(keyColIndex[key]); + ASSERT_TRUE(element); + element->copy(values.get(), row, index, 1); + } + } + } else { + auto flatValues = std::dynamic_pointer_cast>(values); + ASSERT_TRUE(flatValues); + + auto offsets = mapBatch->offsets()->as(); + auto sizes = mapBatch->sizes()->as(); + + // for each row in current batch + for (vector_size_t row = 0; row < mapBatch->size(); row++) { + // for each key in row (single map) + for (vector_size_t index = offsets[row], + endOffset = offsets[row] + sizes[row]; + index < endOffset; + index++) { + ASSERT_FALSE(flatKeys->isNullAt(index)); + // set value in correct row + auto key = flatKeys->valueAt(index); + auto element = std::dynamic_pointer_cast>( + batchStruct->childAt(keyColIndex[key])); + ASSERT_TRUE(element); + element->set(row, flatValues->valueAt(index)); + } } } } @@ -4405,10 +4458,12 @@ struct DictColumnWriterTestCase { VectorPtr dictionaryVector; VectorPtr flatVector; - if (complexRowType == nullptr) { - flatVector = makeFlatVector(size, valueAt, isNullAt); - } else { + if constexpr (IsComplexType::value) { + BOLT_CHECK_NOT_NULL( + complexRowType, "Expected complex row type for complex vectors"); flatVector = makeComplexVectors(complexRowType, size, isNullAt); + } else { + flatVector = makeFlatVector(size, valueAt, isNullAt); } auto wrappedVector = BaseVector::wrapInDictionary( @@ -4428,11 +4483,8 @@ struct DictColumnWriterTestCase { context.initBuffer(); // complexVectorType will be nullptr if the vector is not complex. - bool isComplexType = std::dynamic_pointer_cast(type_) || - std::dynamic_pointer_cast(type_) || - std::dynamic_pointer_cast(type_); - - auto complexVectorType = isComplexType ? rowType : nullptr; + auto complexVectorType = + IsComplexType::value ? rowType : std::shared_ptr(); auto batch = createDictionaryBatch(size_, valueAt, isNullAt, complexVectorType); diff --git a/bolt/exec/StreamingAggregation.cpp b/bolt/exec/StreamingAggregation.cpp index dc242f93c..d31170cf5 100644 --- a/bolt/exec/StreamingAggregation.cpp +++ b/bolt/exec/StreamingAggregation.cpp @@ -44,7 +44,7 @@ StreamingAggregation::StreamingAggregation( ? "PartialStreamingAggregation" : "StreamingAggregation"), outputBatchSize_{outputBatchRows()}, - groupNumberThreshold_{2 * outputBatchSize_}, + groupNumberThreshold_{static_cast(2 * outputBatchSize_)}, aggregationNode_{aggregationNode}, step_{aggregationNode->step()} { if (aggregationNode_->ignoreNullKeys()) { diff --git a/bolt/parse/DuckLogicalOperator.h b/bolt/parse/DuckLogicalOperator.h index 7b5e554b5..322c84b0c 100644 --- a/bolt/parse/DuckLogicalOperator.h +++ b/bolt/parse/DuckLogicalOperator.h @@ -57,6 +57,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include // @manual +#include // @manual namespace duckdb { @@ -106,24 +107,29 @@ class LogicalGet : public LogicalOperator { vector returned_types; //! The names of ALL columns that can be returned by the table function vector names; - //! Bound column IDs - vector column_ids; + //! Columns that are used outside the scan + vector projection_ids; //! Filters pushed down for table scan TableFilterSet table_filters; - + //! The set of input parameters for the table function + vector parameters; string GetName() const override; - string ParamsToString() const override; + InsertionOrderPreservingMap ParamsToString() const override; //! Returns the underlying table that is being scanned, or nullptr if there is //! none - TableCatalogEntry* GetTable() const; + optional_ptr GetTable() const; public: + const vector& GetColumnIds() const; vector GetColumnBindings() override; idx_t EstimateCardinality(ClientContext& context) override; protected: void ResolveTypes() override; + + private: + LogicalGet(); }; //! LogicalFilter represents a filter operation (e.g. WHERE or HAVING clause) @@ -133,8 +139,6 @@ class LogicalFilter : public LogicalOperator { LogicalFilter(); vector projection_map; - - public: vector GetColumnBindings() override; bool SplitPredicates() { @@ -191,7 +195,7 @@ class LogicalAggregate : public LogicalOperator { vector> group_stats; public: - string ParamsToString() const override; + InsertionOrderPreservingMap ParamsToString() const override; vector GetColumnBindings() override; diff --git a/bolt/parse/QueryPlanner.cpp b/bolt/parse/QueryPlanner.cpp index 3097b647a..0b80beab4 100644 --- a/bolt/parse/QueryPlanner.cpp +++ b/bolt/parse/QueryPlanner.cpp @@ -31,8 +31,10 @@ #include "bolt/parse/QueryPlanner.h" #include "bolt/duckdb/conversion/DuckConversion.h" #include "bolt/parse/DuckLogicalOperator.h" +#include "bolt/vector/VariantToVector.h" #include // @manual +#include // @manual #include // @manual #include // @manual #include // @manual @@ -140,23 +142,71 @@ PlanNodePtr toBoltPlan( std::vector sources, QueryContext& queryContext) { if (logicalGet.function.name == "unnest") { - BOLT_CHECK_EQ(1, sources.size()); + // DuckDB 1.1.3 represents UNNEST as a standalone LOGICAL_GET with + // parameters embedded in the LogicalGet. + BOLT_CHECK_EQ(0, sources.size()); + + BOLT_CHECK_EQ( + logicalGet.parameters.size(), + 1, + "UNNEST expects a single parameter, got {}", + logicalGet.parameters.size()); + + const auto& param = logicalGet.parameters[0]; + BOLT_CHECK( + param.type().id() == ::duckdb::LogicalTypeId::LIST, + "UNNEST parameter must be a LIST, got {}", + param.type().ToString()); + + const auto& listType = param.type(); + auto elementType = + duckdb::toBoltType(::duckdb::ListType::GetChildType(listType)); + auto arrayType = ARRAY(elementType); + + std::vector elements; + const auto& listValues = ::duckdb::ListValue::GetChildren(param); + elements.reserve(listValues.size()); + for (const auto& value : listValues) { + elements.emplace_back(duckdb::duckValueToVariant(value)); + } + + auto arrayVector = variantArrayToVector(arrayType, elements, pool); + auto rowType = ROW({queryContext.nextColumnName()}, {arrayType}); + auto rowVector = std::make_shared( + pool, rowType, nullptr, 1, std::vector{arrayVector}); + + std::vector vectors = {rowVector}; + auto valuesNode = + std::make_shared(queryContext.nextNodeId(), vectors); + + std::vector projections{ + std::make_shared( + valuesNode->outputType()->childAt(0), + valuesNode->outputType()->asRow().nameOf(0))}; + std::vector projectionNames{queryContext.nextColumnName()}; + auto projectNode = std::make_shared( + queryContext.nextNodeId(), + std::move(projectionNames), + std::move(projections), + std::move(valuesNode)); + return std::make_shared( queryContext.nextNodeId(), std::vector{}, // replicateVariables std::vector{ std::make_shared( - sources[0]->outputType()->childAt(0), - sources[0]->outputType()->asRow().nameOf(0))}, - std::vector{"a"}, + projectNode->outputType()->childAt(0), + projectNode->outputType()->asRow().nameOf(0))}, + std::vector{ + logicalGet.names.empty() ? "a" : logicalGet.names[0]}, std::nullopt, // ordinalityName - std::move(sources[0])); + std::move(projectNode)); } BOLT_CHECK_EQ(logicalGet.function.name, "seq_scan"); BOLT_CHECK_EQ(0, sources.size()); - const auto& columnIds = logicalGet.column_ids; + const auto& columnIds = logicalGet.GetColumnIds(); std::vector names(columnIds.size()); std::vector types(columnIds.size()); @@ -464,6 +514,18 @@ PlanNodePtr toBoltPlan( pool, std::move(sources), queryContext); + case ::duckdb::LogicalOperatorType::LOGICAL_UNNEST: + BOLT_CHECK_EQ(1, sources.size()); + return std::make_shared( + queryContext.nextNodeId(), + std::vector{}, // replicateVariables + std::vector{ + std::make_shared( + sources[0]->outputType()->childAt(0), + sources[0]->outputType()->asRow().nameOf(0))}, + std::vector{"a"}, + std::nullopt, // ordinalityName + std::move(sources[0])); default: BOLT_NYI( "Plan node is not supported yet: {}", @@ -478,11 +540,14 @@ static void customScalarFunction( BOLT_UNREACHABLE(); } -static ::duckdb::idx_t customAggregateState() { +::duckdb::idx_t customAggregateState( + const ::duckdb::AggregateFunction& /*function*/) { BOLT_UNREACHABLE(); } -static void customAggregateInitialize(::duckdb::data_ptr_t state) { +void customAggregateInitialize( + const ::duckdb::AggregateFunction& /*function*/, + ::duckdb::data_ptr_t /* state */) { BOLT_UNREACHABLE(); } diff --git a/conanfile.py b/conanfile.py index b51f1e6b7..aeba91373 100644 --- a/conanfile.py +++ b/conanfile.py @@ -275,9 +275,7 @@ def requirements(self): if self.settings.os in ["Linux", "FreeBSD"]: if self.options.get_safe("enable_perf"): self.requires("gperftools/2.16") - self.requires("libunwind/1.8.0", override=True) - else: - self.requires("libunwind/1.8.0") + self.requires("libunwind/1.8.3", override=True) self.requires("utf8proc/2.11.0", transitive_headers=True, transitive_libs=True) self.requires("date/3.0.4-bolt", transitive_headers=True, transitive_libs=True) self.requires("libbacktrace/cci.20210118") @@ -287,7 +285,7 @@ def requirements(self): self.requires("paimon-cpp/0.0.3-bolt") if self.options.get_safe("enable_testutil"): self.requires("gtest/1.17.0", force=True) - self.requires("duckdb/0.8.1") + self.requires("duckdb/1.1.3") def build_requirements(self): self.tool_requires("m4/1.4.19") @@ -324,10 +322,13 @@ def configure(self): self.options[paimon_cpp].with_avro = True arrow_simd_level = "default" + llvm_targets = None if str(self.settings.arch) in ["x86", "x86_64"]: arrow_simd_level = "avx2" + llvm_targets = "X86" elif str(self.settings.arch) in ["armv8", "arm", "armv9"]: arrow_simd_level = "neon" + llvm_targets = "AArch64" self.options[arrow].parquet = True self.options[arrow].filesystem_layer = True self.options[arrow].simd_level = arrow_simd_level @@ -360,6 +361,9 @@ def configure(self): self.options[llvm_core].with_zstd = False self.options[llvm_core].with_ffi = False self.options[llvm_core].with_clang = True + if llvm_targets is None: + raise RuntimeError("Unsupported target for JIT feature") + self.options[llvm_core].targets = llvm_targets if self.options.get_safe("enable_hdfs") and self.options.get_safe( "use_arrow_hdfs" diff --git a/scripts/conan/patches/arrow.15.0.1-csv-support.patch b/scripts/conan/patches/arrow.15.0.1-csv-support.patch index afcacf375..917c0111c 100644 --- a/scripts/conan/patches/arrow.15.0.1-csv-support.patch +++ b/scripts/conan/patches/arrow.15.0.1-csv-support.patch @@ -356,7 +356,7 @@ index 24a7af89b..27655cdf4 100644 self.requires("grpc/1.50.0") if self._requires_rapidjson(): - self.requires("rapidjson/1.1.0") -+ self.requires("rapidjson/[>=cci.20230929]") ++ self.requires("rapidjson/cci.20250205") if self.options.with_llvm: self.requires("llvm-core/13.0.0") if self.options.with_openssl: diff --git a/scripts/run-clang-tidy.py b/scripts/run-clang-tidy.py index 83f9aadaf..610779b2d 100755 --- a/scripts/run-clang-tidy.py +++ b/scripts/run-clang-tidy.py @@ -318,7 +318,9 @@ def process_gha_output(stdout): def tidy(args): - extensions = (".cc", ".cpp", ".cxx", ".c", ".h", ".hpp", ".hxx") + source_extensions = (".cc", ".cpp", ".cxx", ".c") + header_extensions = (".h", ".hpp", ".hxx") + extensions = source_extensions + header_extensions candidate_files = [] # get file list to check if args.directory: @@ -476,9 +478,21 @@ def _compute_changed_lines() -> Optional[Multimap]: print("No changed C/C++ lines detected for clang-tidy.") return 0 + # Keep headers in --line-filter so diagnostics can be reported there, + # but run clang-tidy only on translation units that have compile DB entries. + all_changed_for_filter = list(files_to_process) + files_to_process = [ + f for f in files_to_process if f.endswith(source_extensions) + ] + if not files_to_process: + print( + "Only header changes detected; no source files to run clang-tidy on in this mode." + ) + return 0 + # Use absolute paths in --line-filter for better compatibility with compile DBs. final_map_abs = {} # type: Dict[str, List[List[int]]] - for f in files_to_process: + for f in all_changed_for_filter: abs_f = to_repo_abs(f, git_root) final_map_abs[abs_f] = changed_lines[f] line_filter_json = json.dumps(