From 32d25c3986c8a2f97b9870d5183d2d050e8eeceb Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Mon, 28 Aug 2023 17:35:35 +0100 Subject: [PATCH 1/2] [compiler] Delete dead CLBuiltinInfo code These builtins have all been converted to mux builtins and, as such, the code is dead. This also updates some vecz tests which were testing the old CL builtins rather than the generic mux ones. --- .../include/compiler/utils/cl_builtin_info.h | 4 - .../compiler/utils/source/cl_builtin_info.cpp | 115 +----------------- .../lit/llvm/constant_address_with_uniform.ll | 8 +- .../vecz/test/lit/llvm/diverging_loop.ll | 10 +- .../test/lit/llvm/diverging_nested_loop.ll | 10 +- .../scalar_load_store_in_varying_branch.ll | 10 +- .../vecz/test/lit/llvm/scalar_splat.ll | 8 +- ...plat_after_load_store_in_varying_branch.ll | 11 +- .../llvm/scalar_splat_after_varying_branch.ll | 11 +- .../llvm/scalar_splat_in_varying_branch.ll | 12 +- .../lit/llvm/secretly_scalar_load_store.ll | 8 +- .../vecz/test/lit/llvm/uniform_loop.ll | 6 +- .../test/lit/llvm/uniform_loop_metadata.ll | 6 +- .../test/lit/llvm/unmangled_builtin_call.ll | 8 +- 14 files changed, 55 insertions(+), 172 deletions(-) diff --git a/modules/compiler/utils/include/compiler/utils/cl_builtin_info.h b/modules/compiler/utils/include/compiler/utils/cl_builtin_info.h index c93333f18..0e9fad7da 100644 --- a/modules/compiler/utils/include/compiler/utils/cl_builtin_info.h +++ b/modules/compiler/utils/include/compiler/utils/cl_builtin_info.h @@ -101,10 +101,6 @@ class CLBuiltinInfo : public BILangInfoConcept { /// @see BuiltinInfo::emitBuiltinInline llvm::Value *emitBuiltinInline(llvm::Function *Builtin, llvm::IRBuilder<> &B, llvm::ArrayRef Args) override; - /// @see BuiltinInfo::getBuiltinRange - std::optional getBuiltinRange( - llvm::CallInst &CI, std::array, 3> MaxLocalSizes, - std::array, 3> MaxGlobalSizes) const override; /// @see BuiltinInfo::lowerBuiltinToMuxBuiltin llvm::Instruction *lowerBuiltinToMuxBuiltin(llvm::CallInst &, diff --git a/modules/compiler/utils/source/cl_builtin_info.cpp b/modules/compiler/utils/source/cl_builtin_info.cpp index a0c14bdb4..1fa7ac660 100644 --- a/modules/compiler/utils/source/cl_builtin_info.cpp +++ b/modules/compiler/utils/source/cl_builtin_info.cpp @@ -532,7 +532,7 @@ struct CLBuiltinEntry { }; /// @brief Information about known OpenCL builtins. -static const CLBuiltinEntry Builtins[] = { +static constexpr CLBuiltinEntry Builtins[] = { // Non-standard Builtin Functions {eCLBuiltinConvertHalfToFloat, "convert_half_to_float"}, {eCLBuiltinConvertFloatToHalf, "convert_float_to_half"}, @@ -951,73 +951,9 @@ llvm::StringRef CLBuiltinInfo::getBuiltinName(BuiltinID ID) const { return llvm::StringRef(); } -BuiltinUniformity CLBuiltinInfo::isBuiltinUniform(Builtin const &B, +BuiltinUniformity CLBuiltinInfo::isBuiltinUniform(Builtin const &, const CallInst *CI, - unsigned SimdDimIdx) const { - ConstantInt *Rank = nullptr; - switch (B.ID) { - default: - break; - case eCLBuiltinGetWorkDim: - case eCLBuiltinGetGroupId: - case eCLBuiltinGetGlobalSize: - case eCLBuiltinGetGlobalOffset: - case eCLBuiltinGetLocalSize: - case eCLBuiltinGetEnqueuedLocalSize: - case eCLBuiltinGetNumGroups: - return eBuiltinUniformityAlways; - case eCLBuiltinAsyncWorkGroupCopy: - case eCLBuiltinAsyncWorkGroupStridedCopy: - case eCLBuiltinWaitGroupEvents: - case eCLBuiltinAsyncWorkGroupCopy2D2D: - case eCLBuiltinAsyncWorkGroupCopy3D3D: - // These builtins will always be uniform within the same workgroup, as - // otherwise their behaviour is undefined. They might not be across - // workgroups, but we do not vectorize across workgroups anyway. - return eBuiltinUniformityAlways; - case eCLBuiltinGetGlobalId: - case eCLBuiltinGetLocalId: - // We need to know the rank of these builtins at compile time. - if (!CI || CI->arg_empty()) { - return eBuiltinUniformityNever; - } - Rank = dyn_cast(CI->getArgOperand(0)); - if (!Rank) { - // The Rank is some function, which "might" evaluate to zero - // sometimes, so we let the packetizer sort it out with some - // conditional magic. - // TODO Make sure this can never go haywire in weird edge cases. - // Where we have one get_global_id() dependent on another, this is - // not packetized correctly. Doing so is very hard! We should - // probably just fail to packetize in this case. We might also be - // able to return eBuiltinUniformityNever here, in cases where we can - // prove that the value can never be zero. - return eBuiltinUniformityMaybeInstanceID; - } - // Only vectorize on selected dimension. The value of get_global_id with - // other ranks is uniform. - if (Rank->getZExtValue() == SimdDimIdx) { - return eBuiltinUniformityInstanceID; - } else { - return eBuiltinUniformityAlways; - } - case eCLBuiltinGetLocalLinearId: - case eCLBuiltinGetGlobalLinearId: - // TODO: This is fine for vectorizing in the x-axis, but currently we do - // not support vectorizing along y or z (see CA-2843). - return (SimdDimIdx) ? eBuiltinUniformityNever - : eBuiltinUniformityInstanceID; - case eCLBuiltinGetSubgroupLocalId: - return eBuiltinUniformityInstanceID; - case eCLBuiltinSubgroupAll: - case eCLBuiltinSubgroupAny: - case eCLBuiltinSubgroupReduceAdd: - case eCLBuiltinSubgroupReduceMax: - case eCLBuiltinSubgroupReduceMin: - case eCLBuiltinSubgroupBroadcast: - return eBuiltinUniformityAlways; - } - + unsigned) const { // Assume that builtins with side effects are varying. if (Function *Callee = CI->getCalledFunction()) { auto const Props = analyzeBuiltin(*Callee).properties; @@ -2724,51 +2660,6 @@ Value *CLBuiltinInfo::emitBuiltinInlinePrintf(BuiltinID, IRBuilder<> &B, return CreateBuiltinCall(B, Printf, Args); } -std::optional CLBuiltinInfo::getBuiltinRange( - CallInst &CI, std::array, 3> MaxLocalSizes, - std::array, 3> MaxGlobalSizes) const { - assert(CI.getCalledFunction() && CI.getType()->isIntegerTy() && - "Unexpected builtin"); - - BuiltinID BuiltinID = identifyBuiltin(*CI.getCalledFunction()); - - auto Bits = CI.getType()->getIntegerBitWidth(); - // Assume we're indexing the global sizes array. - std::array, 3> *SizesPtr = &MaxGlobalSizes; - - switch (BuiltinID) { - default: - return std::nullopt; - case eCLBuiltinGetWorkDim: - return ConstantRange::getNonEmpty(APInt(Bits, 1), APInt(Bits, 4)); - case eCLBuiltinGetLocalId: - case eCLBuiltinGetLocalSize: - case eCLBuiltinGetEnqueuedLocalSize: - // Use the local sizes array, and fall through to common handling. - SizesPtr = &MaxLocalSizes; - LLVM_FALLTHROUGH; - case eCLBuiltinGetGlobalSize: { - auto *DimIdx = CI.getOperand(0); - if (!isa(DimIdx)) { - return std::nullopt; - } - uint64_t DimVal = cast(DimIdx)->getZExtValue(); - if (DimVal >= SizesPtr->size() || !(*SizesPtr)[DimVal]) { - return std::nullopt; - } - // ID builtins range from [0,size) and size builtins from [1,size]. Thus - // offset the range by 1 at each low/high end when returning the range - // for a size builtin. - int const SizeAdjust = BuiltinID == eCLBuiltinGetLocalSize || - BuiltinID == eCLBuiltinGetEnqueuedLocalSize || - BuiltinID == eCLBuiltinGetGlobalSize; - return ConstantRange::getNonEmpty( - APInt(Bits, SizeAdjust), - APInt(Bits, *(*SizesPtr)[DimVal] + SizeAdjust)); - } - } -} - // Must be kept in sync with our OpenCL headers! enum : uint32_t { CLK_LOCAL_MEM_FENCE = 1, diff --git a/modules/compiler/vecz/test/lit/llvm/constant_address_with_uniform.ll b/modules/compiler/vecz/test/lit/llvm/constant_address_with_uniform.ll index 74e35d421..d2ff89e2e 100644 --- a/modules/compiler/vecz/test/lit/llvm/constant_address_with_uniform.ll +++ b/modules/compiler/vecz/test/lit/llvm/constant_address_with_uniform.ll @@ -14,16 +14,16 @@ ; ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; RUN: veczc -k test -w 4 -S < %s | FileCheck %s +; RUN: veczc -w 4 -S < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024" target triple = "spir-unknown-unknown" -declare spir_func i32 @get_global_id(i32); +declare spir_func i32 @__mux_get_global_id(i32); define spir_kernel void @test(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %out2) { entry: - %gid = call i32 @get_global_id(i32 0) + %gid = call i32 @__mux_get_global_id(i32 0) %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 3 store i32 %gid, i32 addrspace(1)* %arrayidx, align 4 @@ -35,7 +35,7 @@ entry: ; CHECK: define spir_kernel void @__vecz_v4_test ; CHECK-NEXT: entry: -; CHECK-NEXT: %gid = call i32 @get_global_id(i32 0) +; CHECK-NEXT: %gid = call i32 @__mux_get_global_id(i32 0) ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %out, i32 3 ; CHECK: store i32 %gid, ptr addrspace(1) %arrayidx, align 4 ; CHECK: store <4 x ptr addrspace(1)> %{{.+}}, ptr addrspace(1) %{{.+}} diff --git a/modules/compiler/vecz/test/lit/llvm/diverging_loop.ll b/modules/compiler/vecz/test/lit/llvm/diverging_loop.ll index a8c7d4edf..157e28cb1 100644 --- a/modules/compiler/vecz/test/lit/llvm/diverging_loop.ll +++ b/modules/compiler/vecz/test/lit/llvm/diverging_loop.ll @@ -14,18 +14,18 @@ ; ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; RUN: veczc -k test -w 4 -S < %s | FileCheck %s +; RUN: veczc -w 4 -S < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024" target triple = "spir-unknown-unknown" -declare spir_func i32 @get_local_id(i32); -declare spir_func i32 @get_local_size(i32); +declare i32 @__mux_get_local_id(i32); +declare i32 @__mux_get_local_size(i32); define spir_kernel void @test(i32 addrspace(1)* %in) { entry: - %id = call i32 @get_local_id(i32 0) - %size = call i32 @get_local_size(i32 0) + %id = call i32 @__mux_get_local_id(i32 0) + %size = call i32 @__mux_get_local_size(i32 0) br label %loop loop: diff --git a/modules/compiler/vecz/test/lit/llvm/diverging_nested_loop.ll b/modules/compiler/vecz/test/lit/llvm/diverging_nested_loop.ll index 486d6c200..5abfe81e2 100644 --- a/modules/compiler/vecz/test/lit/llvm/diverging_nested_loop.ll +++ b/modules/compiler/vecz/test/lit/llvm/diverging_nested_loop.ll @@ -14,18 +14,18 @@ ; ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; RUN: veczc -k test -w 4 -S < %s | FileCheck %s +; RUN: veczc -w 4 -S < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024" target triple = "spir-unknown-unknown" -declare spir_func i32 @get_local_id(i32); -declare spir_func i32 @get_local_size(i32); +declare i32 @__mux_get_local_id(i32); +declare i32 @__mux_get_local_size(i32); define spir_kernel void @test(i32 addrspace(1)* %in) { entry: - %id = call i32 @get_local_id(i32 0) - %size = call i32 @get_local_size(i32 0) + %id = call i32 @__mux_get_local_id(i32 0) + %size = call i32 @__mux_get_local_size(i32 0) br label %loop loop: diff --git a/modules/compiler/vecz/test/lit/llvm/scalar_load_store_in_varying_branch.ll b/modules/compiler/vecz/test/lit/llvm/scalar_load_store_in_varying_branch.ll index ec972edbc..a850d6f99 100644 --- a/modules/compiler/vecz/test/lit/llvm/scalar_load_store_in_varying_branch.ll +++ b/modules/compiler/vecz/test/lit/llvm/scalar_load_store_in_varying_branch.ll @@ -14,17 +14,17 @@ ; ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; RUN: veczc -k test -w 4 -S < %s | FileCheck %s +; RUN: veczc -w 4 -S < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024" target triple = "spir-unknown-unknown" -declare spir_func i32 @get_local_id(i32); -declare spir_func i32 @get_global_id(i32); +declare spir_func i32 @__mux_get_local_id(i32); +declare spir_func i32 @__mux_get_global_id(i32); define spir_kernel void @test(i32 addrspace(1)* %in) { entry: - %lid = call i32 @get_local_id(i32 0) + %lid = call i32 @__mux_get_local_id(i32 0) %cmp = icmp eq i32 %lid, 0 br i1 %cmp, label %if, label %merge @@ -37,7 +37,7 @@ if: merge: %multi_load = load i32, i32 addrspace(1)* %in %multi_add = add i32 %multi_load, 42 - %gid = call i32 @get_global_id(i32 0) + %gid = call i32 @__mux_get_global_id(i32 0) %slot = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %gid store i32 %multi_add, i32 addrspace(1)* %slot diff --git a/modules/compiler/vecz/test/lit/llvm/scalar_splat.ll b/modules/compiler/vecz/test/lit/llvm/scalar_splat.ll index c13008f42..39792aee4 100644 --- a/modules/compiler/vecz/test/lit/llvm/scalar_splat.ll +++ b/modules/compiler/vecz/test/lit/llvm/scalar_splat.ll @@ -14,19 +14,17 @@ ; ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; RUN: veczc -k test -w 4 -S < %s | FileCheck %s +; RUN: veczc -w 4 -S < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024" target triple = "spir-unknown-unknown" -declare spir_func void @barrier(i32); -declare spir_func i32 @get_local_id(i32); -declare spir_func i32 @get_global_id(i32); +declare i32 @__mux_get_global_id(i32); define spir_kernel void @test(i32 addrspace(1)* %in) { entry: %load = load i32, i32 addrspace(1)* %in - %gid = call i32 @get_global_id(i32 0) + %gid = call i32 @__mux_get_global_id(i32 0) %slot = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %gid store i32 %load, i32 addrspace(1)* %slot diff --git a/modules/compiler/vecz/test/lit/llvm/scalar_splat_after_load_store_in_varying_branch.ll b/modules/compiler/vecz/test/lit/llvm/scalar_splat_after_load_store_in_varying_branch.ll index ba1f94c45..89ddc9091 100644 --- a/modules/compiler/vecz/test/lit/llvm/scalar_splat_after_load_store_in_varying_branch.ll +++ b/modules/compiler/vecz/test/lit/llvm/scalar_splat_after_load_store_in_varying_branch.ll @@ -14,18 +14,17 @@ ; ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; RUN: veczc -k test -w 4 -S < %s | FileCheck %s +; RUN: veczc -w 4 -S < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024" target triple = "spir-unknown-unknown" -declare spir_func void @barrier(i32); -declare spir_func i32 @get_local_id(i32); -declare spir_func i32 @get_global_id(i32); +declare i32 @__mux_get_local_id(i32); +declare i32 @__mux_get_global_id(i32); define spir_kernel void @test(i32 addrspace(1)* %in) { entry: - %lid = call i32 @get_local_id(i32 0) + %lid = call i32 @__mux_get_local_id(i32 0) %cmp = icmp eq i32 %lid, 0 br i1 %cmp, label %if, label %merge @@ -37,7 +36,7 @@ if: merge: %load = load i32, i32 addrspace(1)* %in - %gid = call i32 @get_global_id(i32 0) + %gid = call i32 @__mux_get_global_id(i32 0) %slot = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %gid store i32 %load, i32 addrspace(1)* %slot diff --git a/modules/compiler/vecz/test/lit/llvm/scalar_splat_after_varying_branch.ll b/modules/compiler/vecz/test/lit/llvm/scalar_splat_after_varying_branch.ll index 09c2e0b36..be8fd26da 100644 --- a/modules/compiler/vecz/test/lit/llvm/scalar_splat_after_varying_branch.ll +++ b/modules/compiler/vecz/test/lit/llvm/scalar_splat_after_varying_branch.ll @@ -14,18 +14,17 @@ ; ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; RUN: veczc -k test -w 4 -S < %s | FileCheck %s +; RUN: veczc -w 4 -S < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024" target triple = "spir-unknown-unknown" -declare spir_func void @barrier(i32); -declare spir_func i32 @get_local_id(i32); -declare spir_func i32 @get_global_id(i32); +declare i32 @__mux_get_local_id(i32); +declare i32 @__mux_get_global_id(i32); define spir_kernel void @test(i32 addrspace(1)* %in) { entry: - %lid = call i32 @get_local_id(i32 0) + %lid = call i32 @__mux_get_local_id(i32 0) %cmp = icmp eq i32 %lid, 0 br i1 %cmp, label %if, label %merge @@ -34,7 +33,7 @@ if: merge: %load = load i32, i32 addrspace(1)* %in - %gid = call i32 @get_global_id(i32 0) + %gid = call i32 @__mux_get_global_id(i32 0) %slot = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %gid store i32 %load, i32 addrspace(1)* %slot diff --git a/modules/compiler/vecz/test/lit/llvm/scalar_splat_in_varying_branch.ll b/modules/compiler/vecz/test/lit/llvm/scalar_splat_in_varying_branch.ll index c8c424e74..43bf13e83 100644 --- a/modules/compiler/vecz/test/lit/llvm/scalar_splat_in_varying_branch.ll +++ b/modules/compiler/vecz/test/lit/llvm/scalar_splat_in_varying_branch.ll @@ -14,23 +14,23 @@ ; ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; RUN: veczc -k test -w 4 -S < %s | FileCheck %s +; RUN: veczc -w 4 -S < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024" target triple = "spir-unknown-unknown" -declare spir_func i32 @get_local_id(i32); -declare spir_func i32 @get_global_id(i32); +declare i32 @__mux_get_local_id(i32); +declare i32 @__mux_get_global_id(i32); define spir_kernel void @test(i32 addrspace(1)* %in) { entry: - %lid = call i32 @get_local_id(i32 0) + %lid = call i32 @__mux_get_local_id(i32 0) %and = and i32 %lid, 1 %cmp = icmp eq i32 %and, 0 br i1 %cmp, label %if, label %merge if: - %lid1 = call i32 @get_local_id(i32 1) + %lid1 = call i32 @__mux_get_local_id(i32 1) %cmp1 = icmp eq i32 %lid1, 0 br i1 %cmp1, label %deeper_if, label %deeper_merge @@ -39,7 +39,7 @@ deeper_if: deeper_merge: %load = load i32, i32 addrspace(1)* %in - %gid = call i32 @get_global_id(i32 0) + %gid = call i32 @__mux_get_global_id(i32 0) %slot = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %gid store i32 %load, i32 addrspace(1)* %slot br label %merge diff --git a/modules/compiler/vecz/test/lit/llvm/secretly_scalar_load_store.ll b/modules/compiler/vecz/test/lit/llvm/secretly_scalar_load_store.ll index 0986e4737..09135be13 100644 --- a/modules/compiler/vecz/test/lit/llvm/secretly_scalar_load_store.ll +++ b/modules/compiler/vecz/test/lit/llvm/secretly_scalar_load_store.ll @@ -14,23 +14,23 @@ ; ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; RUN: veczc -k test -w 4 -S < %s | FileCheck %s +; RUN: veczc -w 4 -S < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024" target triple = "spir-unknown-unknown" -declare spir_func i32 @get_global_id(i32); +declare i32 @__mux_get_global_id(i32); define spir_kernel void @test(i32 addrspace(1)* %in) { entry: - %gid = call i32 @get_global_id(i32 0) + %gid = call i32 @__mux_get_global_id(i32 0) %and = and i32 %gid, 1 %cmp = icmp eq i32 %and, 0 br i1 %cmp, label %if, label %early_ret early_ret: ; just to prevent ROSCC from sticking its oar in - %gid1 = call i32 @get_global_id(i32 1) + %gid1 = call i32 @__mux_get_global_id(i32 1) ret void if: diff --git a/modules/compiler/vecz/test/lit/llvm/uniform_loop.ll b/modules/compiler/vecz/test/lit/llvm/uniform_loop.ll index 0b35a0f28..43c60eb88 100644 --- a/modules/compiler/vecz/test/lit/llvm/uniform_loop.ll +++ b/modules/compiler/vecz/test/lit/llvm/uniform_loop.ll @@ -14,16 +14,16 @@ ; ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; RUN: veczc -k test -w 4 -S < %s | FileCheck %s +; RUN: veczc -w 4 -S < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024" target triple = "spir-unknown-unknown" -declare spir_func i32 @get_local_size(i32); +declare i32 @__mux_get_local_size(i32); define spir_kernel void @test(i32 addrspace(1)* %in) { entry: - %size = call i32 @get_local_size(i32 0) + %size = call i32 @__mux_get_local_size(i32 0) br label %loop loop: diff --git a/modules/compiler/vecz/test/lit/llvm/uniform_loop_metadata.ll b/modules/compiler/vecz/test/lit/llvm/uniform_loop_metadata.ll index 075ec2ea1..9b7640aa9 100644 --- a/modules/compiler/vecz/test/lit/llvm/uniform_loop_metadata.ll +++ b/modules/compiler/vecz/test/lit/llvm/uniform_loop_metadata.ll @@ -14,16 +14,16 @@ ; ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; RUN: veczc -k test -w 4 -S < %s | FileCheck %s +; RUN: veczc -w 4 -S < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024" target triple = "spir-unknown-unknown" -declare spir_func i32 @get_local_size(i32); +declare spir_func i32 @__mux_get_local_size(i32); define spir_kernel void @test(i32 addrspace(1)* %in) { entry: - %size = call i32 @get_local_size(i32 0) + %size = call i32 @__mux_get_local_size(i32 0) br label %loop loop: diff --git a/modules/compiler/vecz/test/lit/llvm/unmangled_builtin_call.ll b/modules/compiler/vecz/test/lit/llvm/unmangled_builtin_call.ll index f694914f5..a0973a053 100644 --- a/modules/compiler/vecz/test/lit/llvm/unmangled_builtin_call.ll +++ b/modules/compiler/vecz/test/lit/llvm/unmangled_builtin_call.ll @@ -23,7 +23,7 @@ target triple = "spir64-unknown-unknown" ; Function Attrs: nounwind uwtable define void @k_controlflow_loop_if(float* nocapture %out, float* nocapture readonly %in1, i32* nocapture readnone %in2) #0 { entry: - %call = tail call i64 @get_global_id(i32 0) #2 + %call = tail call i64 @__mux_get_global_id(i32 0) #2 %sext = shl i64 %call, 32 %idxprom = ashr exact i64 %sext, 32 %arrayidx = getelementptr inbounds float, float* %in1, i64 %idxprom @@ -35,7 +35,7 @@ entry: ret void } -declare i64 @get_global_id(i32) #1 +declare i64 @__mux_get_global_id(i32) #1 attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } @@ -59,8 +59,8 @@ attributes #2 = { nobuiltin nounwind } ; The vectorized function ; CHECK: define void @__vecz_v[[WIDTH:[0-9]+]]_k_controlflow_loop_if( -; The unmangled get_global_id call -; CHECK: tail call i64 @get_global_id(i32 0) +; The unmangled __mux_get_global_id call +; CHECK: tail call i64 @__mux_get_global_id(i32 0) ; The vectorized loads and stores ; CHECK: load <4 x i32>, ptr %arrayidx, align 4 From 52bee5b3d7def0cc6d74cb270671d3188c384abd Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Mon, 28 Aug 2023 17:39:27 +0100 Subject: [PATCH 2/2] [compiler] Remove use of multi_llvm from CLBuiltinInfo There's no need for multi_llvm::Optional anymore, and all pointers are opaque. --- .../compiler/utils/source/cl_builtin_info.cpp | 48 ++++++++----------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/modules/compiler/utils/source/cl_builtin_info.cpp b/modules/compiler/utils/source/cl_builtin_info.cpp index 1fa7ac660..80eeb6872 100644 --- a/modules/compiler/utils/source/cl_builtin_info.cpp +++ b/modules/compiler/utils/source/cl_builtin_info.cpp @@ -30,11 +30,7 @@ #include #include #include -#include #include -#include -#include -#include #include #include @@ -1291,12 +1287,9 @@ Function *CLBuiltinInfo::getVectorEquivalent(Builtin const &B, unsigned Width, if (OldPtrTy) { if (auto *const PtrRetPointeeTy = getPointerReturnPointeeTy(B.function, Props)) { - auto *OldPointeeTy = BuiltinPointeeTypes[i]; - (void)OldPointeeTy; - assert( - OldPointeeTy && OldPointeeTy == PtrRetPointeeTy && - multi_llvm::isOpaqueOrPointeeTypeMatches(OldPtrTy, OldPointeeTy) && - "Demangling inconsistency"); + [[maybe_unused]] auto *OldPointeeTy = BuiltinPointeeTypes[i]; + assert(OldPointeeTy && OldPointeeTy == PtrRetPointeeTy && + "Demangling inconsistency"); if (!FixedVectorType::isValidElementType(PtrRetPointeeTy)) { return nullptr; } @@ -1415,12 +1408,9 @@ Function *CLBuiltinInfo::getScalarEquivalent(Builtin const &B, Module *M) { Type *const PtrRetPointeeTy = getPointerReturnPointeeTy(B.function, Props); if (PtrRetPointeeTy && PtrRetPointeeTy->isVectorTy()) { - auto *OldPointeeTy = BuiltinPointeeTypes[i]; - (void)OldPointeeTy; - assert( - OldPointeeTy && OldPointeeTy == PtrRetPointeeTy && - multi_llvm::isOpaqueOrPointeeTypeMatches(OldPtrTy, OldPointeeTy) && - "Demangling inconsistency"); + [[maybe_unused]] auto *OldPointeeTy = BuiltinPointeeTypes[i]; + assert(OldPointeeTy && OldPointeeTy == PtrRetPointeeTy && + "Demangling inconsistency"); auto *OldVecTy = cast(PtrRetPointeeTy); Type *NewTy = PointerType::get(OldVecTy->getElementType(), OldPtrTy->getAddressSpace()); @@ -1492,19 +1482,19 @@ Function *CLBuiltinInfo::getScalarEquivalent(Builtin const &B, Module *M) { /// (assumed builtin) Function is known to possess the given qualifier. /// @return true if the parameter is known to have the qualifier, false if not, /// and None on error. -static multi_llvm::Optional paramHasTypeQual(const Function &F, - unsigned ParamIdx, - TypeQualifier Q) { +static std::optional paramHasTypeQual(const Function &F, + unsigned ParamIdx, + TypeQualifier Q) { // Demangle the function name to get the type qualifiers. SmallVector Types; SmallVector Quals; NameMangler Mangler(&F.getContext()); if (Mangler.demangleName(F.getName(), Types, Quals).empty()) { - return multi_llvm::None; + return std::nullopt; } if (ParamIdx >= Quals.size()) { - return multi_llvm::None; + return std::nullopt; } auto &Qual = Quals[ParamIdx]; @@ -1542,7 +1532,7 @@ Value *CLBuiltinInfo::emitBuiltinInline(Function *F, IRBuilder<> &B, // 6.12.3 Integer Functions case eCLBuiltinAddSat: case eCLBuiltinSubSat: { - multi_llvm::Optional IsParamSignedOrNone = + std::optional IsParamSignedOrNone = paramHasTypeQual(*F, 0, eTypeQualSignedInt); if (!IsParamSignedOrNone.has_value()) { return nullptr; @@ -2686,14 +2676,14 @@ enum : uint32_t { memory_order_seq_cst = 4, }; -static multi_llvm::Optional parseMemFenceFlagsParam(Value *const P) { +static std::optional parseMemFenceFlagsParam(Value *const P) { // Grab the 'flags' parameter. if (auto *const Flags = dyn_cast(P)) { // cl_mem_fence_flags is a bitfield and can be 0 or a combination of // CLK_(GLOBAL|LOCAL|IMAGE)_MEM_FENCE values ORed together. switch (Flags->getZExtValue()) { case 0: - return multi_llvm::None; + return std::nullopt; case CLK_LOCAL_MEM_FENCE: return BIMuxInfoConcept::MemSemanticsWorkGroupMemory; case CLK_GLOBAL_MEM_FENCE: @@ -2703,10 +2693,10 @@ static multi_llvm::Optional parseMemFenceFlagsParam(Value *const P) { BIMuxInfoConcept::MemSemanticsCrossWorkGroupMemory); } } - return multi_llvm::None; + return std::nullopt; } -static multi_llvm::Optional parseMemoryScopeParam(Value *const P) { +static std::optional parseMemoryScopeParam(Value *const P) { if (auto *const Scope = dyn_cast(P)) { switch (Scope->getZExtValue()) { case memory_scope_work_item: @@ -2724,10 +2714,10 @@ static multi_llvm::Optional parseMemoryScopeParam(Value *const P) { return BIMuxInfoConcept::MemScopeCrossDevice; } } - return multi_llvm::None; + return std::nullopt; } -static multi_llvm::Optional parseMemoryOrderParam(Value *const P) { +static std::optional parseMemoryOrderParam(Value *const P) { if (auto *const Order = dyn_cast(P)) { switch (Order->getZExtValue()) { case memory_order_relaxed: @@ -2742,7 +2732,7 @@ static multi_llvm::Optional parseMemoryOrderParam(Value *const P) { return BIMuxInfoConcept::MemSemanticsSequentiallyConsistent; } } - return multi_llvm::None; + return std::nullopt; } // This function returns a mux builtin ID for the corresponding CL builtin ID