Skip to content

Commit

Permalink
Merge pull request #108 from frasercrmck/clean-up-clbuiltininfo
Browse files Browse the repository at this point in the history
[compiler] Clean up CLBuiltinInfo
  • Loading branch information
frasercrmck authored Aug 28, 2023
2 parents 996135f + 52bee5b commit 22eff6c
Show file tree
Hide file tree
Showing 14 changed files with 74 additions and 201 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,6 @@ class CLBuiltinInfo : public BILangInfoConcept {
/// @see BuiltinInfo::emitBuiltinInline
llvm::Value *emitBuiltinInline(llvm::Function *Builtin, llvm::IRBuilder<> &B,
llvm::ArrayRef<llvm::Value *> Args) override;
/// @see BuiltinInfo::getBuiltinRange
std::optional<llvm::ConstantRange> getBuiltinRange(
llvm::CallInst &CI, std::array<std::optional<uint64_t>, 3> MaxLocalSizes,
std::array<std::optional<uint64_t>, 3> MaxGlobalSizes) const override;

/// @see BuiltinInfo::lowerBuiltinToMuxBuiltin
llvm::Instruction *lowerBuiltinToMuxBuiltin(llvm::CallInst &,
Expand Down
163 changes: 22 additions & 141 deletions modules/compiler/utils/source/cl_builtin_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,7 @@
#include <llvm/Support/MathExtras.h>
#include <llvm/Transforms/Utils/Cloning.h>
#include <llvm/Transforms/Utils/ValueMapper.h>
#include <multi_llvm/creation_apis_helper.h>
#include <multi_llvm/multi_llvm.h>
#include <multi_llvm/opaque_pointers.h>
#include <multi_llvm/optional_helper.h>
#include <multi_llvm/triple.h>
#include <multi_llvm/vector_type_helper.h>

#include <cmath>
Expand Down Expand Up @@ -532,7 +528,7 @@ struct CLBuiltinEntry {
};

/// @brief Information about known OpenCL builtins.
static const CLBuiltinEntry Builtins[] = {
static constexpr CLBuiltinEntry Builtins[] = {
// Non-standard Builtin Functions
{eCLBuiltinConvertHalfToFloat, "convert_half_to_float"},
{eCLBuiltinConvertFloatToHalf, "convert_float_to_half"},
Expand Down Expand Up @@ -951,73 +947,9 @@ llvm::StringRef CLBuiltinInfo::getBuiltinName(BuiltinID ID) const {
return llvm::StringRef();
}

BuiltinUniformity CLBuiltinInfo::isBuiltinUniform(Builtin const &B,
BuiltinUniformity CLBuiltinInfo::isBuiltinUniform(Builtin const &,
const CallInst *CI,
unsigned SimdDimIdx) const {
ConstantInt *Rank = nullptr;
switch (B.ID) {
default:
break;
case eCLBuiltinGetWorkDim:
case eCLBuiltinGetGroupId:
case eCLBuiltinGetGlobalSize:
case eCLBuiltinGetGlobalOffset:
case eCLBuiltinGetLocalSize:
case eCLBuiltinGetEnqueuedLocalSize:
case eCLBuiltinGetNumGroups:
return eBuiltinUniformityAlways;
case eCLBuiltinAsyncWorkGroupCopy:
case eCLBuiltinAsyncWorkGroupStridedCopy:
case eCLBuiltinWaitGroupEvents:
case eCLBuiltinAsyncWorkGroupCopy2D2D:
case eCLBuiltinAsyncWorkGroupCopy3D3D:
// These builtins will always be uniform within the same workgroup, as
// otherwise their behaviour is undefined. They might not be across
// workgroups, but we do not vectorize across workgroups anyway.
return eBuiltinUniformityAlways;
case eCLBuiltinGetGlobalId:
case eCLBuiltinGetLocalId:
// We need to know the rank of these builtins at compile time.
if (!CI || CI->arg_empty()) {
return eBuiltinUniformityNever;
}
Rank = dyn_cast<ConstantInt>(CI->getArgOperand(0));
if (!Rank) {
// The Rank is some function, which "might" evaluate to zero
// sometimes, so we let the packetizer sort it out with some
// conditional magic.
// TODO Make sure this can never go haywire in weird edge cases.
// Where we have one get_global_id() dependent on another, this is
// not packetized correctly. Doing so is very hard! We should
// probably just fail to packetize in this case. We might also be
// able to return eBuiltinUniformityNever here, in cases where we can
// prove that the value can never be zero.
return eBuiltinUniformityMaybeInstanceID;
}
// Only vectorize on selected dimension. The value of get_global_id with
// other ranks is uniform.
if (Rank->getZExtValue() == SimdDimIdx) {
return eBuiltinUniformityInstanceID;
} else {
return eBuiltinUniformityAlways;
}
case eCLBuiltinGetLocalLinearId:
case eCLBuiltinGetGlobalLinearId:
// TODO: This is fine for vectorizing in the x-axis, but currently we do
// not support vectorizing along y or z (see CA-2843).
return (SimdDimIdx) ? eBuiltinUniformityNever
: eBuiltinUniformityInstanceID;
case eCLBuiltinGetSubgroupLocalId:
return eBuiltinUniformityInstanceID;
case eCLBuiltinSubgroupAll:
case eCLBuiltinSubgroupAny:
case eCLBuiltinSubgroupReduceAdd:
case eCLBuiltinSubgroupReduceMax:
case eCLBuiltinSubgroupReduceMin:
case eCLBuiltinSubgroupBroadcast:
return eBuiltinUniformityAlways;
}

unsigned) const {
// Assume that builtins with side effects are varying.
if (Function *Callee = CI->getCalledFunction()) {
auto const Props = analyzeBuiltin(*Callee).properties;
Expand Down Expand Up @@ -1355,12 +1287,9 @@ Function *CLBuiltinInfo::getVectorEquivalent(Builtin const &B, unsigned Width,
if (OldPtrTy) {
if (auto *const PtrRetPointeeTy =
getPointerReturnPointeeTy(B.function, Props)) {
auto *OldPointeeTy = BuiltinPointeeTypes[i];
(void)OldPointeeTy;
assert(
OldPointeeTy && OldPointeeTy == PtrRetPointeeTy &&
multi_llvm::isOpaqueOrPointeeTypeMatches(OldPtrTy, OldPointeeTy) &&
"Demangling inconsistency");
[[maybe_unused]] auto *OldPointeeTy = BuiltinPointeeTypes[i];
assert(OldPointeeTy && OldPointeeTy == PtrRetPointeeTy &&
"Demangling inconsistency");
if (!FixedVectorType::isValidElementType(PtrRetPointeeTy)) {
return nullptr;
}
Expand Down Expand Up @@ -1479,12 +1408,9 @@ Function *CLBuiltinInfo::getScalarEquivalent(Builtin const &B, Module *M) {
Type *const PtrRetPointeeTy =
getPointerReturnPointeeTy(B.function, Props);
if (PtrRetPointeeTy && PtrRetPointeeTy->isVectorTy()) {
auto *OldPointeeTy = BuiltinPointeeTypes[i];
(void)OldPointeeTy;
assert(
OldPointeeTy && OldPointeeTy == PtrRetPointeeTy &&
multi_llvm::isOpaqueOrPointeeTypeMatches(OldPtrTy, OldPointeeTy) &&
"Demangling inconsistency");
[[maybe_unused]] auto *OldPointeeTy = BuiltinPointeeTypes[i];
assert(OldPointeeTy && OldPointeeTy == PtrRetPointeeTy &&
"Demangling inconsistency");
auto *OldVecTy = cast<FixedVectorType>(PtrRetPointeeTy);
Type *NewTy = PointerType::get(OldVecTy->getElementType(),
OldPtrTy->getAddressSpace());
Expand Down Expand Up @@ -1556,19 +1482,19 @@ Function *CLBuiltinInfo::getScalarEquivalent(Builtin const &B, Module *M) {
/// (assumed builtin) Function is known to possess the given qualifier.
/// @return true if the parameter is known to have the qualifier, false if not,
/// and None on error.
static multi_llvm::Optional<bool> paramHasTypeQual(const Function &F,
unsigned ParamIdx,
TypeQualifier Q) {
static std::optional<bool> paramHasTypeQual(const Function &F,
unsigned ParamIdx,
TypeQualifier Q) {
// Demangle the function name to get the type qualifiers.
SmallVector<Type *, 2> Types;
SmallVector<TypeQualifiers, 2> Quals;
NameMangler Mangler(&F.getContext());
if (Mangler.demangleName(F.getName(), Types, Quals).empty()) {
return multi_llvm::None;
return std::nullopt;
}

if (ParamIdx >= Quals.size()) {
return multi_llvm::None;
return std::nullopt;
}

auto &Qual = Quals[ParamIdx];
Expand Down Expand Up @@ -1606,7 +1532,7 @@ Value *CLBuiltinInfo::emitBuiltinInline(Function *F, IRBuilder<> &B,
// 6.12.3 Integer Functions
case eCLBuiltinAddSat:
case eCLBuiltinSubSat: {
multi_llvm::Optional<bool> IsParamSignedOrNone =
std::optional<bool> IsParamSignedOrNone =
paramHasTypeQual(*F, 0, eTypeQualSignedInt);
if (!IsParamSignedOrNone.has_value()) {
return nullptr;
Expand Down Expand Up @@ -2724,51 +2650,6 @@ Value *CLBuiltinInfo::emitBuiltinInlinePrintf(BuiltinID, IRBuilder<> &B,
return CreateBuiltinCall(B, Printf, Args);
}

std::optional<ConstantRange> CLBuiltinInfo::getBuiltinRange(
CallInst &CI, std::array<std::optional<uint64_t>, 3> MaxLocalSizes,
std::array<std::optional<uint64_t>, 3> MaxGlobalSizes) const {
assert(CI.getCalledFunction() && CI.getType()->isIntegerTy() &&
"Unexpected builtin");

BuiltinID BuiltinID = identifyBuiltin(*CI.getCalledFunction());

auto Bits = CI.getType()->getIntegerBitWidth();
// Assume we're indexing the global sizes array.
std::array<std::optional<uint64_t>, 3> *SizesPtr = &MaxGlobalSizes;

switch (BuiltinID) {
default:
return std::nullopt;
case eCLBuiltinGetWorkDim:
return ConstantRange::getNonEmpty(APInt(Bits, 1), APInt(Bits, 4));
case eCLBuiltinGetLocalId:
case eCLBuiltinGetLocalSize:
case eCLBuiltinGetEnqueuedLocalSize:
// Use the local sizes array, and fall through to common handling.
SizesPtr = &MaxLocalSizes;
LLVM_FALLTHROUGH;
case eCLBuiltinGetGlobalSize: {
auto *DimIdx = CI.getOperand(0);
if (!isa<ConstantInt>(DimIdx)) {
return std::nullopt;
}
uint64_t DimVal = cast<ConstantInt>(DimIdx)->getZExtValue();
if (DimVal >= SizesPtr->size() || !(*SizesPtr)[DimVal]) {
return std::nullopt;
}
// ID builtins range from [0,size) and size builtins from [1,size]. Thus
// offset the range by 1 at each low/high end when returning the range
// for a size builtin.
int const SizeAdjust = BuiltinID == eCLBuiltinGetLocalSize ||
BuiltinID == eCLBuiltinGetEnqueuedLocalSize ||
BuiltinID == eCLBuiltinGetGlobalSize;
return ConstantRange::getNonEmpty(
APInt(Bits, SizeAdjust),
APInt(Bits, *(*SizesPtr)[DimVal] + SizeAdjust));
}
}
}

// Must be kept in sync with our OpenCL headers!
enum : uint32_t {
CLK_LOCAL_MEM_FENCE = 1,
Expand All @@ -2795,14 +2676,14 @@ enum : uint32_t {
memory_order_seq_cst = 4,
};

static multi_llvm::Optional<unsigned> parseMemFenceFlagsParam(Value *const P) {
static std::optional<unsigned> parseMemFenceFlagsParam(Value *const P) {
// Grab the 'flags' parameter.
if (auto *const Flags = dyn_cast<ConstantInt>(P)) {
// cl_mem_fence_flags is a bitfield and can be 0 or a combination of
// CLK_(GLOBAL|LOCAL|IMAGE)_MEM_FENCE values ORed together.
switch (Flags->getZExtValue()) {
case 0:
return multi_llvm::None;
return std::nullopt;
case CLK_LOCAL_MEM_FENCE:
return BIMuxInfoConcept::MemSemanticsWorkGroupMemory;
case CLK_GLOBAL_MEM_FENCE:
Expand All @@ -2812,10 +2693,10 @@ static multi_llvm::Optional<unsigned> parseMemFenceFlagsParam(Value *const P) {
BIMuxInfoConcept::MemSemanticsCrossWorkGroupMemory);
}
}
return multi_llvm::None;
return std::nullopt;
}

static multi_llvm::Optional<unsigned> parseMemoryScopeParam(Value *const P) {
static std::optional<unsigned> parseMemoryScopeParam(Value *const P) {
if (auto *const Scope = dyn_cast<ConstantInt>(P)) {
switch (Scope->getZExtValue()) {
case memory_scope_work_item:
Expand All @@ -2833,10 +2714,10 @@ static multi_llvm::Optional<unsigned> parseMemoryScopeParam(Value *const P) {
return BIMuxInfoConcept::MemScopeCrossDevice;
}
}
return multi_llvm::None;
return std::nullopt;
}

static multi_llvm::Optional<unsigned> parseMemoryOrderParam(Value *const P) {
static std::optional<unsigned> parseMemoryOrderParam(Value *const P) {
if (auto *const Order = dyn_cast<ConstantInt>(P)) {
switch (Order->getZExtValue()) {
case memory_order_relaxed:
Expand All @@ -2851,7 +2732,7 @@ static multi_llvm::Optional<unsigned> parseMemoryOrderParam(Value *const P) {
return BIMuxInfoConcept::MemSemanticsSequentiallyConsistent;
}
}
return multi_llvm::None;
return std::nullopt;
}

// This function returns a mux builtin ID for the corresponding CL builtin ID
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@
;
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

; RUN: veczc -k test -w 4 -S < %s | FileCheck %s
; RUN: veczc -w 4 -S < %s | FileCheck %s

target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
target triple = "spir-unknown-unknown"

declare spir_func i32 @get_global_id(i32);
declare spir_func i32 @__mux_get_global_id(i32);

define spir_kernel void @test(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %out2) {
entry:
%gid = call i32 @get_global_id(i32 0)
%gid = call i32 @__mux_get_global_id(i32 0)
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 3
store i32 %gid, i32 addrspace(1)* %arrayidx, align 4

Expand All @@ -35,7 +35,7 @@ entry:

; CHECK: define spir_kernel void @__vecz_v4_test
; CHECK-NEXT: entry:
; CHECK-NEXT: %gid = call i32 @get_global_id(i32 0)
; CHECK-NEXT: %gid = call i32 @__mux_get_global_id(i32 0)
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %out, i32 3
; CHECK: store i32 %gid, ptr addrspace(1) %arrayidx, align 4
; CHECK: store <4 x ptr addrspace(1)> %{{.+}}, ptr addrspace(1) %{{.+}}
10 changes: 5 additions & 5 deletions modules/compiler/vecz/test/lit/llvm/diverging_loop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,18 @@
;
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

; RUN: veczc -k test -w 4 -S < %s | FileCheck %s
; RUN: veczc -w 4 -S < %s | FileCheck %s

target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
target triple = "spir-unknown-unknown"

declare spir_func i32 @get_local_id(i32);
declare spir_func i32 @get_local_size(i32);
declare i32 @__mux_get_local_id(i32);
declare i32 @__mux_get_local_size(i32);

define spir_kernel void @test(i32 addrspace(1)* %in) {
entry:
%id = call i32 @get_local_id(i32 0)
%size = call i32 @get_local_size(i32 0)
%id = call i32 @__mux_get_local_id(i32 0)
%size = call i32 @__mux_get_local_size(i32 0)
br label %loop

loop:
Expand Down
10 changes: 5 additions & 5 deletions modules/compiler/vecz/test/lit/llvm/diverging_nested_loop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,18 @@
;
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

; RUN: veczc -k test -w 4 -S < %s | FileCheck %s
; RUN: veczc -w 4 -S < %s | FileCheck %s

target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
target triple = "spir-unknown-unknown"

declare spir_func i32 @get_local_id(i32);
declare spir_func i32 @get_local_size(i32);
declare i32 @__mux_get_local_id(i32);
declare i32 @__mux_get_local_size(i32);

define spir_kernel void @test(i32 addrspace(1)* %in) {
entry:
%id = call i32 @get_local_id(i32 0)
%size = call i32 @get_local_size(i32 0)
%id = call i32 @__mux_get_local_id(i32 0)
%size = call i32 @__mux_get_local_size(i32 0)
br label %loop

loop:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@
;
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

; RUN: veczc -k test -w 4 -S < %s | FileCheck %s
; RUN: veczc -w 4 -S < %s | FileCheck %s

target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
target triple = "spir-unknown-unknown"

declare spir_func i32 @get_local_id(i32);
declare spir_func i32 @get_global_id(i32);
declare spir_func i32 @__mux_get_local_id(i32);
declare spir_func i32 @__mux_get_global_id(i32);

define spir_kernel void @test(i32 addrspace(1)* %in) {
entry:
%lid = call i32 @get_local_id(i32 0)
%lid = call i32 @__mux_get_local_id(i32 0)
%cmp = icmp eq i32 %lid, 0
br i1 %cmp, label %if, label %merge

Expand All @@ -37,7 +37,7 @@ if:
merge:
%multi_load = load i32, i32 addrspace(1)* %in
%multi_add = add i32 %multi_load, 42
%gid = call i32 @get_global_id(i32 0)
%gid = call i32 @__mux_get_global_id(i32 0)
%slot = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %gid
store i32 %multi_add, i32 addrspace(1)* %slot

Expand Down
Loading

0 comments on commit 22eff6c

Please sign in to comment.