Skip to content

Commit

Permalink
Merge pull request #264 from frasercrmck/vecz-masked-atomics
Browse files Browse the repository at this point in the history
[vecz] Add support for masking atomic RMW instructions
  • Loading branch information
frasercrmck authored Dec 19, 2023
2 parents 51b4ae8 + f188fe5 commit 1d872df
Show file tree
Hide file tree
Showing 11 changed files with 695 additions and 160 deletions.
45 changes: 44 additions & 1 deletion modules/compiler/vecz/source/include/vectorization_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@

#include <llvm/ADT/DenseMap.h>
#include <llvm/Analysis/IVDescriptors.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/PassManager.h>
#include <llvm/IR/ValueHandle.h>
#include <llvm/Support/AtomicOrdering.h>
#include <llvm/Support/TypeSize.h>
#include <llvm/Transforms/Utils/ValueMapper.h>
#include <multi_llvm/multi_llvm.h>
Expand Down Expand Up @@ -150,14 +153,46 @@ class VectorizationContext {
/// @return The masked version of the function
llvm::Function *getOrCreateMaskedFunction(llvm::CallInst *CI);

struct MaskedAtomicRMW {
llvm::Type *PointerTy;
llvm::Type *ValTy;
llvm::AtomicRMWInst::BinOp BinOp;
llvm::Align Align;
bool IsVolatile = false;
llvm::SyncScope::ID SyncScope;
llvm::AtomicOrdering Ordering;
// Vectorization info
llvm::ElementCount VF;
bool IsVectorPredicated = false;
};

/// @brief Check if the given function is a masked version of an atomic RMW
/// operation.
///
/// @param[in] F The function to check
/// @return A MaskedAtomicRMW instance detailing the atomic operation if the
/// function is a masked atomic RMW, or std::nullopt otherwise
std::optional<MaskedAtomicRMW> isMaskedAtomicRMWFunction(
const llvm::Function &F) const;
/// @brief Get (if it exists already) or create the function representing the
/// masked version of an atomic RMW operation.
///
/// @param[in] I Atomic to be masked
/// @param[in] Choices Choices to mangle into the function name
/// @param[in] VF The vectorization factor of the atomic operation
/// @return The masked version of the function
llvm::Function *getOrCreateMaskedAtomicRMWFunction(
MaskedAtomicRMW &I, const VectorizationChoices &Choices,
llvm::ElementCount VF);

/// @brief Create a VectorizationUnit to use to vectorize the given scalar
/// function.
///
/// The lifetime of the returned VectorizationUnit is managed by the
/// VectorizationContext.
///
/// @param[in] F Function to vectorize.
/// @param[in] Width VF vectorization factor to use.
/// @param[in] VF vectorization factor to use.
/// @param[in] Dimension SIMD dimension to use (0 => x, 1 => y, 2 => z).
/// @param[in] Ch Vectorization Choices for the vectorization.
VectorizationUnit *createVectorizationUnit(llvm::Function &F,
Expand Down Expand Up @@ -258,6 +293,14 @@ class VectorizationContext {
bool emitSubgroupScanBody(llvm::Function &F, bool IsInclusive,
llvm::RecurKind OpKind, bool IsVP) const;

/// @brief Emit the body for a masked atomic builtin
///
/// @param[in] F The empty (declaration only) function to emit the body in
/// @param[in] MA The MaskedAtomicRMW information
/// @returns true on success, false otherwise
bool emitMaskedAtomicRMWBody(llvm::Function &F,
const MaskedAtomicRMW &MA) const;

/// @brief Helper for non-vectorization tasks.
TargetInfo &VTI;
/// @brief Module in which the vectorization happens.
Expand Down
16 changes: 15 additions & 1 deletion modules/compiler/vecz/source/include/vectorization_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,25 @@ class VectorizationChoices;
/// @param[in] ScalarName Name of the original function.
/// @param[in] VF vectorization factor of the vectorized function.
/// @param[in] Choices choices used for vectorization
/// @param[in] IsBuiltin True if this is an internal builtin.
///
/// @return Name for the vectorized function.
std::string getVectorizedFunctionName(llvm::StringRef ScalarName,
llvm::ElementCount VF,
VectorizationChoices Choices);
VectorizationChoices Choices,
bool IsBuiltin = false);

/// @brief Parses a name generated for a vectorized function
///
/// @see getVectorizedFunctionName.
///
/// @param[in] Name Name of the vectorized function.
///
/// @return A tuple containing the original name of the function, and the
/// element count and choices it was encoded with. Returns std::nullopt on
/// failure.
std::optional<std::tuple<std::string, llvm::ElementCount, VectorizationChoices>>
decodeVectorizedFunctionName(llvm::StringRef Name);

/// @brief Clone the scalar function's body into the function to vectorize,
/// vectorizing function argument types where required.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,13 @@
#include <llvm/Analysis/ValueTracking.h>
#include <llvm/IR/BasicBlock.h>
#include <llvm/IR/CFG.h>
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/Dominators.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/Instructions.h>
#include <llvm/Support/Debug.h>
#include <llvm/Support/Error.h>
#include <llvm/Support/TypeSize.h>
#include <llvm/Support/raw_ostream.h>

#include <queue>
Expand Down Expand Up @@ -211,6 +214,16 @@ class ControlFlowConversionState::Impl : public ControlFlowConversionState {
/// @return true if it is valid to mask this call, false otherwise
bool applyMaskToCall(CallInst *CI, Value *mask, DeletionMap &toDelete);

/// @brief Attempt to apply a mask to an AtomicRMW instruction via a builtin
/// call.
///
/// @param[in] atomicI The atomic instruction to apply the mask to
/// @param[in] mask The mask to apply to the masked atomic
/// @param[out] toDelete mapping of deleted unmasked operations
/// @return true if it is valid to mask this atomic, false otherwise
bool applyMaskToAtomicRMW(AtomicRMWInst &atomicI, Value *mask,
DeletionMap &toDelete);

/// @brief Linearize a CFG.
/// @return true if no problem occurred, false otherwise.
bool partiallyLinearizeCFG();
Expand Down Expand Up @@ -1124,9 +1137,12 @@ Error ControlFlowConversionState::Impl::applyMask(BasicBlock &BB, Value *mask) {
return makeStringError("Could not apply mask to call instruction", I);
}
} else if (I.isAtomic() && !isa<FenceInst>(&I)) {
// We need to apply masks to atomic functions, but it is currently not
// implemented. See CA-3294.
return makeStringError("Could not apply mask to atomic instruction", I);
// Turn atomics into calls to masked builtins if possible.
// FIXME: We don't yet support masked cmpxchg instructions.
if (auto *atomicI = dyn_cast<AtomicRMWInst>(&I);
!atomicI || !applyMaskToAtomicRMW(*atomicI, mask, toDelete)) {
return makeStringError("Could not apply mask to atomic instruction", I);
}
} else if (auto *branch = dyn_cast<BranchInst>(&I)) {
// We have to be careful with infinite loops, because if they exist on a
// divergent code path, they will always be entered and will hang the
Expand Down Expand Up @@ -1356,6 +1372,45 @@ bool ControlFlowConversionState::Impl::applyMaskToCall(CallInst *CI,
return true;
}

bool ControlFlowConversionState::Impl::applyMaskToAtomicRMW(
AtomicRMWInst &atomicI, Value *mask, DeletionMap &toDelete) {
LLVM_DEBUG(dbgs() << "vecz-cf: Now at AtomicRMWInst " << atomicI << "\n");

VectorizationContext::MaskedAtomicRMW MA;
MA.Align = atomicI.getAlign();
MA.BinOp = atomicI.getOperation();
MA.IsVectorPredicated = VU.choices().vectorPredication();
MA.IsVolatile = atomicI.isVolatile();
MA.Ordering = atomicI.getOrdering();
MA.SyncScope = atomicI.getSyncScopeID();
MA.VF = ElementCount::getFixed(1);
MA.ValTy = atomicI.getType();
MA.PointerTy = atomicI.getPointerOperand()->getType();
// Create the new function and replace the old one with it
// Get the masked function
Function *newFunction = Ctx.getOrCreateMaskedAtomicRMWFunction(
MA, VU.choices(), ElementCount::getFixed(1));
VECZ_FAIL_IF(!newFunction);
SmallVector<Value *, 8> fnArgs = {atomicI.getPointerOperand(),
atomicI.getValOperand(), mask};
// We don't have a vector length just yet - pass in one as a dummy.
if (MA.IsVectorPredicated) {
fnArgs.push_back(
ConstantInt::get(IntegerType::getInt32Ty(atomicI.getContext()), 1));
}

CallInst *newCI = CallInst::Create(newFunction, fnArgs, "", &atomicI);
VECZ_FAIL_IF(!newCI);

atomicI.replaceAllUsesWith(newCI);
toDelete.emplace_back(&atomicI, newCI);

LLVM_DEBUG(dbgs() << "vecz-cf: Replaced " << atomicI << "\n");
LLVM_DEBUG(dbgs() << " with " << *newCI << "\n");

return true;
}

bool ControlFlowConversionState::Impl::partiallyLinearizeCFG() {
// Two methods are possible to transform the divergent loops into uniform
// ones:
Expand Down
72 changes: 72 additions & 0 deletions modules/compiler/vecz/source/transform/packetizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include "memory_operations.h"
#include "transform/instantiation_pass.h"
#include "transform/packetization_helpers.h"
#include "vectorization_context.h"
#include "vectorization_unit.h"
#include "vecz/vecz_choices.h"
#include "vecz/vecz_target_info.h"
Expand Down Expand Up @@ -301,6 +302,14 @@ class Packetizer::Impl : public Packetizer {
///
/// @return Packetized instruction.
ValuePacket packetizeMemOp(MemOp &Op);
/// @brief Packetize a masked atomic RMW operation.
///
/// @param[in] CI Masked atomic RMW builtin call to packetize.
/// @param[in] AtomicInfo Information about the masked atomic RMW.
///
/// @return Packetized instruction.
ValuePacket packetizeMaskedAtomicRMW(
CallInst &CI, VectorizationContext::MaskedAtomicRMW AtomicInfo);
/// @brief Packetize a GEP instruction.
///
/// @param[in] GEP Instruction to packetize.
Expand Down Expand Up @@ -2093,6 +2102,9 @@ ValuePacket Packetizer::Impl::packetizeCall(CallInst *CI) {
return packetizeMemOp(*MaskedOp);
}
}
if (auto AtomicInfo = Ctx.isMaskedAtomicRMWFunction(*Callee)) {
return packetizeMaskedAtomicRMW(*CI, *AtomicInfo);
}
}

auto const Builtin = Ctx.builtins().analyzeBuiltin(*Callee);
Expand Down Expand Up @@ -2766,6 +2778,66 @@ ValuePacket Packetizer::Impl::packetizeMemOp(MemOp &op) {
return results;
}

ValuePacket Packetizer::Impl::packetizeMaskedAtomicRMW(
CallInst &CI, VectorizationContext::MaskedAtomicRMW AtomicInfo) {
ValuePacket results;

Value *const ptr = CI.getArgOperand(0);
Value *const val = CI.getArgOperand(1);
Value *const mask = CI.getArgOperand(2);

assert(AtomicInfo.ValTy == val->getType() && "AtomicInfo mismatch");
auto const packetWidth = getPacketWidthForType(val->getType());

if (VL && packetWidth != 1) {
emitVeczRemarkMissed(&F, &CI,
"Can not vector-predicate packets larger than 1");
return {};
}

ValuePacket valPacket;
Result valResult = packetize(val);
PACK_FAIL_IF(!valResult);
valResult.getPacketValues(packetWidth, valPacket);
PACK_FAIL_IF(valPacket.empty());

ValuePacket ptrPacket;
Result ptrResult = packetize(ptr);
PACK_FAIL_IF(!ptrResult);
ptrResult.getPacketValues(packetWidth, ptrPacket);
PACK_FAIL_IF(ptrPacket.empty());

ValuePacket maskPacket;
Result maskResult = packetize(mask);
PACK_FAIL_IF(!maskResult);
maskResult.getPacketValues(packetWidth, maskPacket);
PACK_FAIL_IF(maskPacket.empty());

IRBuilder<> B(&CI);
IC.deleteInstructionLater(&CI);

for (unsigned i = 0; i != packetWidth; ++i) {
auto *const ptrI = ptrPacket[i];
auto *const valI = valPacket[i];

AtomicInfo.ValTy = valI->getType();
AtomicInfo.PointerTy = ptrI->getType();
auto *maskedAtomicF =
Ctx.getOrCreateMaskedAtomicRMWFunction(AtomicInfo, Choices, SimdWidth);
PACK_FAIL_IF(!maskedAtomicF);

SmallVector<Value *, 4> args = {ptrI, valI, maskPacket[i]};
if (AtomicInfo.IsVectorPredicated) {
assert(VL && "Missing vector length");
args.push_back(VL);
}

results.push_back(B.CreateCall(maskedAtomicF, args));
}

return results;
}

void Packetizer::Impl::vectorizeDI(Instruction *, Value *) {
// FIXME: Reinstate support for vectorizing debug info
return;
Expand Down
Loading

0 comments on commit 1d872df

Please sign in to comment.