Skip to content

Commit

Permalink
Merge pull request #106 from frasercrmck/vecz-debug-info
Browse files Browse the repository at this point in the history
[vecz] Do not vectorize llvm.debug.value intrinsics
  • Loading branch information
frasercrmck authored Aug 28, 2023
2 parents a5ac5d1 + 0e09cc3 commit 996135f
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 136 deletions.
97 changes: 9 additions & 88 deletions modules/compiler/vecz/source/transform/packetizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ class Packetizer::Impl : public Packetizer {
Value *packetizeSubgroupBroadcast(Instruction *I);
/// @brief Packetize PHI node.
///
/// @param[in] PHI PHI Node to packetize.
/// @param[in] Phi PHI Node to packetize.
///
/// @return Packetized values.
ValuePacket packetizePHI(PHINode *Phi);
Expand All @@ -218,7 +218,7 @@ class Packetizer::Impl : public Packetizer {
/// @brief Packetize a subgroup scan.
///
/// @param[in] CI CallInst to packetize.
/// @param[in] SubgroupScanKind type of subgroup scan to packetized.
/// @param[in] Scan type of subgroup scan to packetized.
///
/// @return Packetized values.
ValuePacket packetizeSubgroupScan(CallInst *CI,
Expand All @@ -230,9 +230,9 @@ class Packetizer::Impl : public Packetizer {
///
/// @return Packetized values.
Result assign(Value *Scalar, Value *Vectorized);
/// @brief Packetize a load instruction.
/// @brief Vectorize an instruction.
///
/// @param[in] Load Instruction to packetize.
/// @param[in] Ins Instruction to packetize.
///
/// @return Packetized instruction.
Value *vectorizeInstruction(Instruction *Ins);
Expand Down Expand Up @@ -274,7 +274,7 @@ class Packetizer::Impl : public Packetizer {
ValuePacket packetizeBinaryOp(BinaryOperator *BinOp);
/// @brief Packetize a freeze instruction.
///
/// @param[in] FreezeInst Instruction to packetize.
/// @param[in] FreezeI Instruction to packetize.
///
/// @return Packetized instruction.
ValuePacket packetizeFreeze(FreezeInst *FreezeI);
Expand Down Expand Up @@ -340,9 +340,9 @@ class Packetizer::Impl : public Packetizer {
///
/// @return Packetized instruction.
ValuePacket packetizeInsertElement(InsertElementInst *InsertElement);
/// @brief Packetize an insert element instruction.
/// @brief Packetize an extract element instruction.
///
/// @param[in] InsertElement Instruction to packetize.
/// @param[in] ExtractElement Instruction to packetize.
///
/// @return Packetized instruction.
ValuePacket packetizeExtractElement(ExtractElementInst *ExtractElement);
Expand Down Expand Up @@ -2104,87 +2104,8 @@ ValuePacket Packetizer::Impl::packetizeMemOp(MemOp &op) {
return results;
}

void Packetizer::Impl::vectorizeDI(Instruction *Scalar, Value *Packet) {
auto *const LAM = LocalAsMetadata::getIfExists(Scalar);
if (!LAM) {
return;
}

auto *const MDV = MetadataAsValue::getIfExists(Scalar->getContext(), LAM);
if (!MDV) {
return;
}

DIBuilder DIB(*Scalar->getModule(), false);

// Find all the debug value intrinsics attached to scalar instruction
for (User *U : MDV->users()) {
DbgValueInst *const DVI = dyn_cast<DbgValueInst>(U);
if (!DVI) {
continue;
}

DILocalVariable *const DILocal = DVI->getVariable();
DIType *LocalType = dyn_cast<DIType>(DILocal->getType());

// Vector types need to be of a integral base type
while (!isa<DIBasicType>(LocalType)) {
if (DIDerivedType *DerivedType = dyn_cast<DIDerivedType>(LocalType)) {
LocalType = dyn_cast_or_null<DIType>(DerivedType->getBaseType());
} else if (DICompositeType *CompositeType =
dyn_cast<DICompositeType>(LocalType)) {
auto baseType = CompositeType->getBaseType();
LocalType = dyn_cast_or_null<DIType>(baseType);
} else {
// Error case:
// No other valid derived classes of DIType,
// however some might be added to LLVM in the future.
break;
}

if (!LocalType) {
break;
}
}

// Type is something complex like a struct which we can't handle
if (!LocalType) {
continue;
}

if (SimdWidth.isScalable()) {
continue;
}
// Create a new DI vector type with simd width
const unsigned int Width = SimdWidth.getFixedValue();
Metadata *const Subscript = DIB.getOrCreateSubrange(0, Width);
DINodeArray SubscriptArray = DIB.getOrCreateArray(Subscript);

const uint64_t Size = LocalType->getSizeInBits() * Width;
const uint64_t Align = LocalType->getAlignInBits() * Width;

DICompositeType *const VectorType =
DIB.createVectorType(Size, Align, LocalType, SubscriptArray);

// Replace DILocalVariable type with our new vectorized type
DILocal->replaceOperandWith(3, VectorType);

// New packetized instruction will point to the base of our vector type
auto DIExpr = DIB.createExpression();

// Create llvm.dbg.value() intrinsic for packetized instruction,
// but can't insert it before a phi node.
if (isa<PHINode>(Scalar)) {
DIB.insertDbgValueIntrinsic(Packet, DILocal, DIExpr, DVI->getDebugLoc(),
Scalar->getParent()->getFirstNonPHI());
} else {
DIB.insertDbgValueIntrinsic(Packet, DILocal, DIExpr, DVI->getDebugLoc(),
Scalar);
}
// Delete the old scalar debug intrinsic since the instruction
// it references will also be deleted.
IC.deleteInstructionLater(DVI);
}
void Packetizer::Impl::vectorizeDI(Instruction *, Value *) {
// FIXME: Reinstate support for vectorizing debug info
return;
}

Expand Down
14 changes: 8 additions & 6 deletions modules/compiler/vecz/test/lit/llvm/insert_element_debug_info.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,13 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "spir64-unknown-unknown"

; Function Attrs: nounwind
; CHECK: define spir_kernel void @__vecz_v4_unaligned_load
define spir_kernel void @unaligned_load(i32 addrspace(1)* %in, i32 addrspace(1)* %offsets, i32 addrspace(1)* %out) #0 !dbg !7 {
entry:
%in.addr = alloca i32 addrspace(1)*, align 8
%offsets.addr = alloca i32 addrspace(1)*, align 8
%out.addr = alloca i32 addrspace(1)*, align 8
; CHECK: %tmp = alloca <16 x i32>, align 16
%tid = alloca i32, align 4
%tmp = alloca <3 x i32>, align 16
store i32 addrspace(1)* %in, i32 addrspace(1)** %in.addr, align 8
Expand All @@ -44,6 +45,12 @@ entry:
store i32 %conv, i32* %tid, align 4, !dbg !31
call void @llvm.dbg.declare(metadata <3 x i32>* %tmp, metadata !15, metadata !29), !dbg !32
%0 = load i32 addrspace(1)*, i32 addrspace(1)** %in.addr, align 8, !dbg !32
; CHECK: %[[TMP_LD:.+]] = call <4 x i32> @__vecz_b_interleaved_load4_4_Dv4_ju3ptr(ptr nonnull %tmp)
; FIXME: This llvm.dbg.value marks a 'kill location' and denotes the
; termination of the previous value assigned to %tmp - we could probably do
; better here by manifesting a vectorized value?
; CHECK: call void @llvm.dbg.value(metadata i32 {{(poison|undef)}}, metadata !{{[0-9]+}},
; CHECK-SAME: metadata !DIExpression({{.*}})), !dbg !{{[0-9]+}}
%1 = load i32, i32* %tid, align 4, !dbg !32
%mul = mul nsw i32 3, %1, !dbg !32
%idx.ext = sext i32 %mul to i64, !dbg !32
Expand Down Expand Up @@ -135,8 +142,3 @@ attributes #3 = { nobuiltin }
!34 = !DILocation(line: 5, scope: !7)
!35 = !DILocation(line: 6, scope: !7)
!36 = !DILocation(line: 7, scope: !7)

; CHECK: define spir_kernel void @__vecz_v4_unaligned_load
; CHECK: %tmp = alloca <16 x i32>, align 16
; CHECK: %[[TMP_LD:.+]] = call <4 x i32> @__vecz_b_interleaved_load4_4_Dv4_ju3ptr(ptr nonnull %tmp)
; CHECK: call void @llvm.dbg.value(metadata <4 x i32> %[[TMP_LD]], metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}}
52 changes: 18 additions & 34 deletions modules/compiler/vecz/test/lit/llvm/packetization_debug_info.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "spir64-unknown-unknown"

; Function Attrs: nounwind
; Vectorized kernel function
; CHECK: @__vecz_v[[WIDTH:[0-9]+]]_add({{.*}} !dbg [[VECZ_SUBPROG:![0-9]+]]
; Check that intrinsics for user variable locations are still present
define spir_kernel void @add(i32 addrspace(1)* %in1, i32 addrspace(1)* %in2, i32 addrspace(1)* %out) #0 !dbg !4 {
entry:
%in1.addr = alloca i32 addrspace(1)*, align 8
Expand All @@ -33,20 +35,34 @@ entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
store i32 addrspace(1)* %in1, i32 addrspace(1)** %in1.addr, align 8
; CHECK: call void @llvm.dbg.value(metadata ptr addrspace(1) %in1, metadata [[DI_IN1:![0-9]+]], metadata [[EXPR:!DIExpression()]]
; CHECK-SAME: !dbg [[PARAM_LOC:![0-9]+]]
call void @llvm.dbg.declare(metadata i32 addrspace(1)** %in1.addr, metadata !11, metadata !29), !dbg !30
store i32 addrspace(1)* %in2, i32 addrspace(1)** %in2.addr, align 8
; CHECK: call void @llvm.dbg.value(metadata ptr addrspace(1) %in2, metadata [[DI_IN2:![0-9]+]], metadata [[EXPR]]
; CHECK-SAME: !dbg [[PARAM_LOC]]
call void @llvm.dbg.declare(metadata i32 addrspace(1)** %in2.addr, metadata !12, metadata !29), !dbg !30
store i32 addrspace(1)* %out, i32 addrspace(1)** %out.addr, align 8
; CHECK: call void @llvm.dbg.value(metadata ptr addrspace(1) %out, metadata [[DI_OUT:![0-9]+]], metadata [[EXPR]]
; CHECK-SAME: !dbg [[PARAM_LOC]]
call void @llvm.dbg.declare(metadata i32 addrspace(1)** %out.addr, metadata !13, metadata !29), !dbg !30
; CHECK: call void @llvm.dbg.value(metadata i64 %call, metadata [[DI_TID:![0-9]+]], metadata [[EXPR]]
; CHECK-SAME: !dbg [[TID_LOC:![0-9]+]]
call void @llvm.dbg.declare(metadata i64* %tid, metadata !14, metadata !29), !dbg !31
%call = call i64 @__mux_get_global_id(i32 0) #3, !dbg !31
store i64 %call, i64* %tid, align 8, !dbg !31
; FIXME: We're dropping the llvm.dbg.declare/llvm.dbg.value for %a here - we
; could probably preserve it.
; CHECK-NOT: call void @llvm.dbg.value(
call void @llvm.dbg.declare(metadata i32* %a, metadata !19, metadata !29), !dbg !32
%0 = load i64, i64* %tid, align 8, !dbg !32
%1 = load i32 addrspace(1)*, i32 addrspace(1)** %in1.addr, align 8, !dbg !32
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i64 %0, !dbg !32
%2 = load i32, i32 addrspace(1)* %arrayidx, align 4, !dbg !32
store i32 %2, i32* %a, align 4, !dbg !32
; FIXME: We're dropping the llvm.dbg.declare/llvm.dbg.value for %a here - we
; could probably preserve it.
; CHECK-NOT: call void @llvm.dbg.value(
call void @llvm.dbg.declare(metadata i32* %b, metadata !20, metadata !29), !dbg !33
%3 = load i64, i64* %tid, align 8, !dbg !33
%4 = load i32 addrspace(1)*, i32 addrspace(1)** %in2.addr, align 8, !dbg !33
Expand Down Expand Up @@ -115,27 +131,6 @@ attributes #3 = { nobuiltin }
!34 = !DILocation(line: 7, scope: !4)
!35 = !DILocation(line: 8, scope: !4)

; Vectorized kernel function
; CHECK: @__vecz_v[[WIDTH:[0-9]+]]_add({{.*}} !dbg [[VECZ_SUBPROG:![0-9]+]]

; Check that intrinsics for user variable locations are still present
; CHECK: call void @llvm.dbg.value(metadata ptr addrspace(1) %in1, metadata [[DI_IN1:![0-9]+]], metadata [[EXPR:!DIExpression()]]
; CHECK-SAME: !dbg [[PARAM_LOC:![0-9]+]]

; CHECK: call void @llvm.dbg.value(metadata ptr addrspace(1) %in2, metadata [[DI_IN2:![0-9]+]], metadata [[EXPR]]
; CHECK-SAME: !dbg [[PARAM_LOC]]

; CHECK: call void @llvm.dbg.value(metadata ptr addrspace(1) %out, metadata [[DI_OUT:![0-9]+]], metadata [[EXPR]]
; CHECK-SAME: !dbg [[PARAM_LOC]]

; CHECK: call void @llvm.dbg.value(metadata i64 %call, metadata [[DI_TID:![0-9]+]], metadata [[EXPR]]
; CHECK-SAME: !dbg [[TID_LOC:![0-9]+]]

; CHECK: call void @llvm.dbg.value(metadata {{.*}}, metadata [[DI_A:![0-9]+]], metadata [[EXPR]]
; CHECK-SAME:!dbg [[A_LOC:![0-9]+]]

; CHECK: call void @llvm.dbg.value(metadata {{.*}}, metadata [[DI_B:![0-9]+]], metadata [[EXPR]]
; CHECK-SAME:!dbg [[B_LOC:![0-9]+]]

; Debug info metadata entries
; CHECK:[[PTR_TYPE:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[DI_BASE:![0-9]+]], size: 64, align: 64)
Expand All @@ -144,7 +139,7 @@ attributes #3 = { nobuiltin }
; CHECK: [[VECZ_SUBPROG]] = distinct !DISubprogram(name: "add",
; CHECK-SAME: retainedNodes: [[VECZ_VARS:![0-9]+]]

; CHECK: [[VECZ_VARS]] = !{[[DI_IN1]], [[DI_IN2]], [[DI_OUT]], [[DI_TID]], [[DI_A]], [[DI_B]]}
; CHECK: [[VECZ_VARS]] = !{[[DI_IN1]], [[DI_IN2]], [[DI_OUT]], [[DI_TID]], [[DI_A:![0-9]+]], [[DI_B:![0-9]+]]}
; CHECK: [[DI_IN1]] = !DILocalVariable(name: "in1", arg: 1, scope: [[VECZ_SUBPROG]],
; CHECK-SAME:line: 1, type: [[PTR_TYPE]]
; CHECK: [[DI_IN2]] = !DILocalVariable(name: "in2", arg: 2, scope: [[VECZ_SUBPROG]],
Expand All @@ -154,14 +149,3 @@ attributes #3 = { nobuiltin }

; CHECK: [[DI_TID]] = !DILocalVariable(name: "tid", scope: [[VECZ_SUBPROG]]
; CHECK: [[DI_A]] = !DILocalVariable(name: "a", scope: [[VECZ_SUBPROG]],
; CHECK-SAME:line: 5, type: [[VECTOR_TYPE:![0-9]+]])

; Vectorized debug info type create in packetization pass
; CHECK: [[VECTOR_TYPE]] = !DICompositeType(tag: DW_TAG_array_type, baseType: [[DI_BASE]], size: {{[0-9]+}}, align: {{[0-9]+}}
; CHECK-SAME:flags: DIFlagVector, elements: ![[DI_ELEMS:[0-9]+]])
; CHECK:[[DI_ELEMS]] = !{[[DI_SUBRANGE:![0-9]+]]}
; LLVM 11 adds a lowerBound argument to DISubrange, so the optional check below
; CHECK: [[DI_SUBRANGE]] = !DISubrange(count: [[WIDTH]]{{(, lowerBound: [0-9])?}})

; CHECK: [[DI_B]] = !DILocalVariable(name: "b", scope: [[VECZ_SUBPROG]],
; CHECK-SAME: line: 6, type: [[VECTOR_TYPE]])
18 changes: 10 additions & 8 deletions modules/compiler/vecz/test/lit/llvm/phi_node_debug_info.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
; Check that debug info intrinsics are correctly placed after
; phi nodes.

; RUN: veczc -k loop_phi -vecz-simd-width=4 -S < %s | FileCheck %s
; RUN: veczc -vecz-simd-width=4 -S < %s | FileCheck %s

; ModuleID = 'kernel.opencl'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "spir64-unknown-unknown"

; Function Attrs: nounwind
; CHECK: define spir_kernel void @__vecz_v4_loop_phi(
define spir_kernel void @loop_phi(i32 addrspace(3)* %a, i32 addrspace(3)* %b) #0 !dbg !4 {
entry:
%a.addr = alloca i32 addrspace(3)*, align 8
Expand All @@ -43,6 +44,13 @@ entry:
store i32 %conv, i32* %i, align 4, !dbg !33
br label %for.cond, !dbg !33


; CHECK: for.cond:
; CHECK: %[[PHI1:.+]] = phi {{i[0-9]+}} [ %{{.+}}, %entry ], [ %{{.+}}, %for.cond ]
; CHECK: call void @llvm.dbg.value(metadata i64 %[[PHI1]], metadata !{{[0-9]+}},
; CHECK-SAME: metadata !DIExpression({{.*}})), !dbg !{{[0-9]+}}
; Check we haven't inserted a llvm.dbg.value intrinsic before the last of the PHIs.
; CHECK-NOT: phi
for.cond: ; preds = %for.inc, %entry
%1 = load i32, i32* %i, align 4, !dbg !34
%cmp = icmp slt i32 %1, 128, !dbg !34
Expand All @@ -68,6 +76,7 @@ for.inc: ; preds = %for.body
br label %for.cond, !dbg !34

for.end: ; preds = %for.cond
; CHECK: ret void
ret void, !dbg !39
}

Expand Down Expand Up @@ -126,10 +135,3 @@ attributes #3 = { nobuiltin }
!37 = distinct !DILexicalBlock(scope: !35, file: !1, line: 4)
!38 = !DILocation(line: 6, scope: !37)
!39 = !DILocation(line: 7, scope: !4)

; CHECK: for.cond:
; CHECK: %[[PHI1:.+]] = phi <4 x [[TYPE:i[0-9]+]]> [ %{{.+}}, %entry ], [ %{{.+}}, %for.cond ]
; CHECK: call void @llvm.dbg.value(metadata <4 x [[TYPE]]> %[[PHI1]], metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}}
; CHECK-NOT: phi

; CHECK: ret void

0 comments on commit 996135f

Please sign in to comment.