diff --git a/modules/compiler/vecz/source/transform/control_flow_conversion_pass.cpp b/modules/compiler/vecz/source/transform/control_flow_conversion_pass.cpp index f7649b5b4..f3b0ef34a 100644 --- a/modules/compiler/vecz/source/transform/control_flow_conversion_pass.cpp +++ b/modules/compiler/vecz/source/transform/control_flow_conversion_pass.cpp @@ -1503,11 +1503,32 @@ bool ControlFlowConversionState::Impl::createBranchReductions() { auto *TI = BB.getTerminator(); if (BranchInst *Branch = dyn_cast(TI)) { if (Branch->isConditional()) { - auto *const cond = Branch->getCondition(); + auto *cond = Branch->getCondition(); if (isa(cond)) { continue; } + // On divergent paths, ensure that only active lanes contribute to a + // branch condition; merge the branch condition with the active lane + // mask. This ensures that disabled lanes don't spuriously contribute a + // 'true' value into the reduced branch condition. + // Note that the distinction between 'uniform' and 'divergent' isn't + // 100% sufficient for our purposes here, because even uniform values + // may read undefined/poison values when masked out. + // Don't perform this on uniform loops as those may be unconditionally + // entered even when no work-items are active. Masking the loop exit + // with the entry mask would mean that the loop never exits. + // FIXME: Is this missing incorrect branches in uniform blocks/loops? + // FIXME: This is pessimistic - some branch condition values are truly + // uniform even on masked-out lanes - but we don't have sophisticated + // enough analysis to discern true positives from false positives. + if (auto *LTag = DR->getTag(&BB).loop; + DR->isDivergent(BB) && (!LTag || LTag->isLoopDivergent())) { + cond = BinaryOperator::Create(Instruction::BinaryOps::And, cond, + MaskInfos[&BB].entryMask, + cond->getName() + "_active", Branch); + } + const auto &name = needsAllOfMask ? nameAll : nameAny; Function *const F = Ctx.getOrCreateInternalBuiltin( Twine(baseName).concat(name).str(), FT); diff --git a/modules/compiler/vecz/test/lit/llvm/Boscc/boscc_merge.ll b/modules/compiler/vecz/test/lit/llvm/Boscc/boscc_merge.ll index e8e6062d8..469725b7c 100644 --- a/modules/compiler/vecz/test/lit/llvm/Boscc/boscc_merge.ll +++ b/modules/compiler/vecz/test/lit/llvm/Boscc/boscc_merge.ll @@ -122,6 +122,7 @@ if.else6: ; preds = %if.then6, %if.e ; CHECK: br i1 %[[CMP1]], label %[[IFTHEN:.+]], label %[[IFELSE:.+]] ; CHECK: [[IFTHEN]]: +; CHECK: %[[CMP2:.+]] = icmp ; CHECK: br i1 %{{.+}}, label %[[IFTHEN2UNIFORM:.+]], label %[[IFTHENBOSCCINDIR:.+]] ; CHECK: [[IFELSE2PREHEADERUNIFORM:.+]]: @@ -220,7 +221,9 @@ if.else6: ; preds = %if.then6, %if.e ; CHECK: [[IFTHEN2:.+]]: ; CHECK: %[[CMP3:.+]] = icmp -; CHECK: br i1 %[[CMP3]], label %[[IFTHEN3PREHEADER:.+]], label %[[IFELSE3PREHEADER:.+]] +; CHECK: %[[CMP3_ACTIVE:.+]] = and i1 %[[CMP3]], %[[CMP2]] +; CHECK: %[[CMP3_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[CMP3_ACTIVE]]) +; CHECK: br i1 %[[CMP3_ACTIVE_ANY]], label %[[IFTHEN3PREHEADER:.+]], label %[[IFELSE3PREHEADER:.+]] ; CHECK: [[IFELSE3PREHEADER]]: ; CHECK: br label %[[IFELSE3]] diff --git a/modules/compiler/vecz/test/lit/llvm/Boscc/boscc_merge3.ll b/modules/compiler/vecz/test/lit/llvm/Boscc/boscc_merge3.ll index ccd79ca20..3986db936 100644 --- a/modules/compiler/vecz/test/lit/llvm/Boscc/boscc_merge3.ll +++ b/modules/compiler/vecz/test/lit/llvm/Boscc/boscc_merge3.ll @@ -96,7 +96,9 @@ attributes #0 = { nounwind readnone } ; CHECK: if.then1: ; CHECK: %gep1 = -; CHECK: br i1 %cmp2, label %if.then2, label %if.end1 +; CHECK: %[[CMP2_ACTIVE:.+]] = and i1 %cmp2, %cmp1 +; CHECK: %[[CMP2_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[CMP2_ACTIVE]]) +; CHECK: br i1 %[[CMP2_ACTIVE_ANY]], label %if.then2, label %if.end1 ; Generalizing the expected %cmp3 value because the 'icmp' could go off ; by one BB between LLVM versions. Therefore we can get %cmp3.not. diff --git a/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization12.ll b/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization12.ll index eba24fbbd..763c8e960 100644 --- a/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization12.ll +++ b/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization12.ll @@ -667,7 +667,9 @@ attributes #2 = { nobuiltin nounwind readonly } ; CHECK: [[IFTHEN38]]: ; CHECK: %[[CMP42:cmp.+]] = icmp -; CHECK: br i1 %[[CMP42]], label %[[IFTHEN44:.+]], label %[[IFELSE46:.+]] +; CHECK: %[[CMP42_ACTIVE:.+]] = and i1 %[[CMP42]], {{%.*}} +; CHECK: %[[CMP42_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[CMP42_ACTIVE]]) +; CHECK: br i1 %[[CMP42_ACTIVE_ANY]], label %[[IFTHEN44:.+]], label %[[IFELSE46:.+]] ; CHECK: [[IFTHEN44]]: ; CHECK: br label %[[IFELSE48:.+]] @@ -677,7 +679,9 @@ attributes #2 = { nobuiltin nounwind readonly } ; CHECK: [[IFELSE48]]: ; CHECK: %[[CMP49:.+]] = icmp -; CHECK: br i1 %[[CMP49]], label %[[IFTHEN51:.+]], label %[[IFELSE53:.+]] +; CHECK: %[[CMP49_ACTIVE:.+]] = and i1 %[[CMP49]], {{%.*}} +; CHECK: %[[CMP49_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[CMP49_ACTIVE]]) +; CHECK: br i1 %[[CMP49_ACTIVE_ANY]], label %[[IFTHEN51:.+]], label %[[IFELSE53:.+]] ; CHECK: [[IFTHEN51]]: ; CHECK: br label %[[N58:.+]] diff --git a/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization13.ll b/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization13.ll index f7937ad89..7f61d8736 100644 --- a/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization13.ll +++ b/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization13.ll @@ -228,7 +228,9 @@ attributes #2 = { nobuiltin nounwind readonly } ; CHECK: [[IFTHEN4]]: ; CHECK: %[[TRUNC:.+]] = icmp -; CHECK: br i1 %[[TRUNC]], label %[[SWBB8:.+]], label %[[SWBB:.+]] +; CHECK: %[[TRUNC_ACTIVE:.+]] = and i1 %[[TRUNC]], {{%.*}} +; CHECK: %[[TRUNC_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[TRUNC_ACTIVE]]) +; CHECK: br i1 %[[TRUNC_ACTIVE_ANY]], label %[[SWBB8:.+]], label %[[SWBB:.+]] ; CHECK: [[SWBB]]: ; CHECK: br label %[[SWBB8]] diff --git a/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization17.ll b/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization17.ll index 5e7b83b78..1c2450f3a 100644 --- a/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization17.ll +++ b/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization17.ll @@ -277,7 +277,9 @@ attributes #2 = { convergent nobuiltin nounwind readonly } ; CHECK: [[WHILEBODY]]: ; CHECK: %[[CMP:.+]] = icmp -; CHECK: br i1 %[[CMP]], label %[[FORCOND28PREHEADER:.+]], label %[[IFELSE:.+]] +; CHECK: %[[CMP_ACTIVE:.+]] = and i1 %[[CMP]], {{%.*}} +; CHECK: %[[CMP_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[CMP_ACTIVE]]) +; CHECK: br i1 %[[CMP_ACTIVE_ANY]], label %[[FORCOND28PREHEADER:.+]], label %[[IFELSE:.+]] ; CHECK: [[FORCOND28PREHEADER]]: ; CHECK: br label %[[WHILEBODYPUREEXIT:.+]] diff --git a/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization18.ll b/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization18.ll index 571c0a48f..fffcb6dbe 100644 --- a/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization18.ll +++ b/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization18.ll @@ -220,7 +220,9 @@ attributes #2 = { convergent nobuiltin nounwind readonly } ; CHECK: [[WHILEBODY]]: ; CHECK: %[[CMP:.+]] = icmp -; CHECK: br i1 %[[CMP]], label %[[IFTHEN:.+]], label %[[IFEND:.+]] +; CHECK: %[[CMP_ACTIVE:.+]] = and i1 %[[CMP]], {{%.*}} +; CHECK: %[[CMP_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[CMP_ACTIVE]]) +; CHECK: br i1 %[[CMP_ACTIVE_ANY]], label %[[IFTHEN:.+]], label %[[IFEND:.+]] ; CHECK: [[IFTHEN]]: ; CHECK: br label %[[WHILEBODYPUREEXIT:.+]] diff --git a/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization19.ll b/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization19.ll index 7b67cbd48..7faea031b 100644 --- a/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization19.ll +++ b/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization19.ll @@ -233,7 +233,9 @@ attributes #2 = { convergent nobuiltin nounwind readonly } ; CHECK: [[WHILEBODY]]: ; CHECK: %[[CMP:.+]] = icmp -; CHECK: br i1 %[[CMP]], label %[[IFTHEN:.+]], label %[[IFEND:.+]] +; CHECK: %[[CMP_ACTIVE:.+]] = and i1 %[[CMP]], {{%.*}} +; CHECK: %[[CMP_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[CMP_ACTIVE]]) +; CHECK: br i1 %[[CMP_ACTIVE_ANY]], label %[[IFTHEN:.+]], label %[[IFEND:.+]] ; CHECK: [[IFTHEN]]: ; CHECK: %[[CMP2:.+]] = icmp diff --git a/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization5.ll b/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization5.ll index cb6acb6c7..ca2b671e2 100644 --- a/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization5.ll +++ b/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization5.ll @@ -227,7 +227,9 @@ attributes #2 = { nobuiltin nounwind readonly } ; CHECK: [[IFELSE5]]: ; CHECK: %[[CMP7:.+]] = icmp -; CHECK: br i1 %[[CMP7]], label %[[IFTHEN]], label %[[FORCOND14PREHEADER:.+]] +; CHECK: %[[CMP7_ACTIVE:.+]] = and i1 %[[CMP7]], {{%.*}} +; CHECK: %[[CMP7_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[CMP7_ACTIVE]]) +; CHECK: br i1 %[[CMP7_ACTIVE_ANY]], label %[[IFTHEN]], label %[[FORCOND14PREHEADER:.+]] ; CHECK: [[FORCOND14PREHEADER]]: ; CHECK: br label %[[FORCOND14:.+]] diff --git a/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization6.ll b/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization6.ll index c5a8af2b8..bd2027413 100644 --- a/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization6.ll +++ b/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization6.ll @@ -168,7 +168,9 @@ attributes #2 = { nobuiltin nounwind readonly } ; CHECK: [[WHILEBODY]]: ; CHECK: %[[CMP:.+]] = icmp -; CHECK: br i1 %[[CMP]], label %[[IFTHEN:.+]], label %[[IFELSE:.+]] +; CHECK: %[[CMP_ACTIVE:.+]] = and i1 %[[CMP]], {{%.*}} +; CHECK: %[[CMP_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[CMP_ACTIVE]]) +; CHECK: br i1 %[[CMP_ACTIVE_ANY]], label %[[IFTHEN:.+]], label %[[IFELSE:.+]] ; CHECK: [[IFTHEN]]: ; CHECK: %[[CMP2:.+]] = icmp diff --git a/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization7.ll b/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization7.ll index 1bbf53c65..4024f9bf5 100644 --- a/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization7.ll +++ b/modules/compiler/vecz/test/lit/llvm/Boscc/partial_linearization7.ll @@ -238,7 +238,9 @@ attributes #2 = { nobuiltin nounwind readonly } ; CHECK: [[E]]: ; CHECK: %[[CMP13:.+]] = icmp -; CHECK: br i1 %[[CMP13]], label %[[G:.+]], label %[[H:.+]] +; CHECK: %[[CMP13_ACTIVE:.+]] = and i1 %[[CMP13]], {{%.*}} +; CHECK: %[[CMP13_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[CMP13_ACTIVE]]) +; CHECK: br i1 %[[CMP13_ACTIVE_ANY]], label %[[G:.+]], label %[[H:.+]] ; CHECK: [[G]]: ; CHECK: br label %[[FORCOND19:.+]] diff --git a/modules/compiler/vecz/test/lit/llvm/divergent_loop_bug.ll b/modules/compiler/vecz/test/lit/llvm/divergent_loop_bug.ll new file mode 100644 index 000000000..9eacfe58c --- /dev/null +++ b/modules/compiler/vecz/test/lit/llvm/divergent_loop_bug.ll @@ -0,0 +1,186 @@ +; Copyright (C) Codeplay Software Limited +; +; Licensed under the Apache License, Version 2.0 (the "License") with LLVM +; Exceptions; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; https://github.com/codeplaysoftware/oneapi-construction-kit/blob/main/LICENSE.txt +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +; WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +; License for the specific language governing permissions and limitations +; under the License. +; +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +; RUN: veczc -vecz-passes=cfg-convert -S < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "spir64-unknown-unknown" + +; CHECK: define spir_kernel void @__vecz_v4_uniform_if_then_in_divergent_block( +; CHECK-SAME: ptr addrspace(1) %accum_ptr, i32 %threshold, ptr addrspace(1) %out) +define spir_kernel void @uniform_if_then_in_divergent_block(ptr addrspace(1) %accum_ptr, i32 %threshold, ptr addrspace(1) %out) #4 !reqd_work_group_size !10 { +; CHECK: entry: +; CHECK: [[CMP_NOT:%.*]] = icmp slt i32 %0, %threshold +; CHECK: %cmp.not.ROSCC = icmp eq i1 [[CMP_NOT]], false +; CHECK: %cmp.not.ROSCC_any = call i1 @__vecz_b_divergence_any(i1 %cmp.not.ROSCC) +; CHECK: br i1 %cmp.not.ROSCC_any, label %entry.ROSCC, label %entry.if.end17_crit_edge +entry: + %cosa = alloca float, align 4 + %call = tail call i64 @__mux_get_global_id(i32 0) #5 + %sext = mul i64 %call, 51539607552 + %idx.ext = ashr exact i64 %sext, 32 + %add.ptr = getelementptr inbounds i32, ptr addrspace(1) %accum_ptr, i64 %idx.ext + %0 = load i32, ptr addrspace(1) %add.ptr, align 4 + %cmp.not = icmp slt i32 %0, %threshold + br i1 %cmp.not, label %entry.if.end17_crit_edge, label %if.then + +; CHECK: entry.ROSCC: +; CHECK: [[CMP_NOT_NOT:%.*]] = xor i1 [[CMP_NOT]], true +; CHECK: br label %if.then + +entry.if.end17_crit_edge: ; preds = %entry + br label %if.end17 + +; Ensure that only active lanes (masked by %cmp.not.not) contribute towards the +; %or.cond branch. +; CHECK: if.then: +; CHECK: call void @__vecz_b_masked_store4_fu3ptrb(float 0.000000e+00, ptr %cosa, i1 [[CMP_NOT_NOT]]) +; CHECK: %1 = call spir_func float @__vecz_b_masked__Z6sincosfPf(float 0.000000e+00, ptr nonnull %cosa, i1 [[CMP_NOT_NOT]]) #9 +; CHECK: %2 = call float @__vecz_b_masked_load4_fu3ptrb(ptr %cosa, i1 [[CMP_NOT_NOT]]) +; CHECK: %mul7 = fmul float %2, -2.950000e+01 +; CHECK: %cmp11 = fcmp uge float %mul7, 0.000000e+00 +; CHECK: %cmp14 = fcmp ult float %mul7, 6.400000e+01 +; CHECK: %or.cond = and i1 %cmp11, %cmp14 +; CHECK: %or.cond_active = and i1 %or.cond, [[CMP_NOT_NOT]] +; CHECK: %or.cond_active_any = call i1 @__vecz_b_divergence_any(i1 %or.cond_active) +; CHECK: br i1 %or.cond_active_any, label %if.then.if.end_crit_edge, label %if.then16 +if.then: ; preds = %entry + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cosa) #6 + store float 0.000000e+00, ptr %cosa, align 4 + %call4 = call spir_func float @_Z6sincosfPf(float 0.000000e+00, ptr nonnull %cosa) #7 + %1 = load float, ptr %cosa, align 4 + %mul7 = fmul float %1, -2.950000e+01 + %cmp11 = fcmp uge float %mul7, 0.000000e+00 + %cmp14 = fcmp ult float %mul7, 6.400000e+01 + %or.cond = and i1 %cmp11, %cmp14 + br i1 %or.cond, label %if.then.if.end_crit_edge, label %if.then16 + +if.then.if.end_crit_edge: ; preds = %if.then + br label %if.end + +if.then16: ; preds = %if.then + %sext2 = shl i64 %call, 32 + %idxprom = ashr exact i64 %sext2, 32 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %out, i64 %idxprom + store float %mul7, ptr addrspace(1) %arrayidx, align 4 + br label %if.end + +if.end: ; preds = %if.then.if.end_crit_edge, %if.then16 + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cosa) #6 + br label %if.end17 + +if.end17: ; preds = %entry.if.end17_crit_edge, %if.end + ret void +} + +define spir_kernel void @uniform_if_else_in_divergent_block(ptr addrspace(1) %accum_ptr, i32 %threshold, ptr addrspace(1) %out) #4 !reqd_work_group_size !10 { +; CHECK: entry: +; CHECK: [[CMP_NOT:%.*]] = icmp slt i32 %0, %threshold +; CHECK: %cmp.not.ROSCC = icmp eq i1 [[CMP_NOT]], false +; CHECK: %cmp.not.ROSCC_any = call i1 @__vecz_b_divergence_any(i1 %cmp.not.ROSCC) +; CHECK: br i1 %cmp.not.ROSCC_any, label %entry.ROSCC, label %entry.if.end17_crit_edge +entry: + %cosa = alloca float, align 4 + %call = tail call i64 @__mux_get_global_id(i32 0) #5 + %sext = mul i64 %call, 51539607552 + %idx.ext = ashr exact i64 %sext, 32 + %add.ptr = getelementptr inbounds i32, ptr addrspace(1) %accum_ptr, i64 %idx.ext + %0 = load i32, ptr addrspace(1) %add.ptr, align 4 + %cmp.not = icmp slt i32 %0, %threshold + br i1 %cmp.not, label %entry.if.end17_crit_edge, label %if.then + +; CHECK: entry.ROSCC: +; CHECK: [[CMP_NOT_NOT:%.*]] = xor i1 [[CMP_NOT]], true +; CHECK: br label %if.then + +entry.if.end17_crit_edge: ; preds = %entry + br label %if.end17 + +; Ensure that only active lanes (masked by %cmp.not.not) contribute towards the +; %or.cond branch. +; CHECK: if.then: +; CHECK: call void @__vecz_b_masked_store4_fu3ptrb(float 0.000000e+00, ptr %cosa, i1 [[CMP_NOT_NOT]]) +; CHECK: %1 = call spir_func float @__vecz_b_masked__Z6sincosfPf(float 0.000000e+00, ptr nonnull %cosa, i1 [[CMP_NOT_NOT]]) #9 +; CHECK: %2 = call float @__vecz_b_masked_load4_fu3ptrb(ptr %cosa, i1 [[CMP_NOT_NOT]]) +; CHECK: %mul7 = fmul float %2, -2.950000e+01 +; CHECK: %cmp11 = fcmp uge float %mul7, 0.000000e+00 +; CHECK: %cmp14 = fcmp ult float %mul7, 6.400000e+01 +; CHECK: %or.cond = and i1 %cmp11, %cmp14 +; CHECK: %or.cond_active = and i1 %or.cond, [[CMP_NOT_NOT]] +; CHECK: %or.cond_active_any = call i1 @__vecz_b_divergence_any(i1 %or.cond_active) +; CHECK: br i1 %or.cond_active_any, label %if.else.crit_edge, label %if.then16 +if.then: ; preds = %entry + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cosa) #6 + store float 0.000000e+00, ptr %cosa, align 4 + %call4 = call spir_func float @_Z6sincosfPf(float 0.000000e+00, ptr nonnull %cosa) #7 + %1 = load float, ptr %cosa, align 4 + %mul7 = fmul float %1, -2.950000e+01 + %cmp11 = fcmp uge float %mul7, 0.000000e+00 + %cmp14 = fcmp ult float %mul7, 6.400000e+01 + %or.cond = and i1 %cmp11, %cmp14 + br i1 %or.cond, label %if.else.crit_edge, label %if.then16 + +if.else.crit_edge: ; preds = %if.then + br label %if.else + +if.then16: ; preds = %if.then + %sext2 = shl i64 %call, 32 + %idxprom = ashr exact i64 %sext2, 32 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %out, i64 %idxprom + store float %mul7, ptr addrspace(1) %arrayidx, align 4 + br label %if.end + +if.else: + %arrayidx2 = getelementptr inbounds float, ptr addrspace(1) %out, i64 %idxprom + store float 1.0, ptr addrspace(1) %arrayidx2, align 4 + br label %if.end + +if.end: ; preds = %if.else, %if.then16 + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cosa) #6 + br label %if.end17 + +if.end17: ; preds = %entry.if.end17_crit_edge, %if.end + ret void +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nounwind +declare spir_func float @_Z6sincosfPf(float, ptr) #2 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: alwaysinline norecurse nounwind memory(read) +declare i64 @__mux_get_global_id(i32) #3 + +attributes #0 = { norecurse nounwind "mux-kernel"="entry-point" "mux-local-mem-usage"="0" "mux-no-subgroups" "mux-orig-fn"="get_lines" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" "uniform-work-group-size"="true" "vecz-mode"="auto" } +attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) "vecz-mode"="auto" } +attributes #2 = { nounwind "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" "vecz-mode"="auto" } +attributes #3 = { alwaysinline norecurse nounwind memory(read) "vecz-mode"="auto" } +attributes #4 = { norecurse nounwind "mux-base-fn-name"="get_lines" "mux-kernel"="entry-point" "mux-local-mem-usage"="0" "mux-no-subgroups" "mux-orig-fn"="get_lines" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" "uniform-work-group-size"="true" "vecz-mode"="auto" } +attributes #5 = { alwaysinline norecurse nounwind memory(read) } +attributes #6 = { nounwind } +attributes #7 = { nobuiltin nounwind "no-builtins" } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!2} +!opencl.spir.version = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 1, i32 2} +!10 = !{i32 2, i32 1, i32 1} diff --git a/modules/compiler/vecz/test/lit/llvm/partial_linearization12.ll b/modules/compiler/vecz/test/lit/llvm/partial_linearization12.ll index f226c3eb5..e338a7352 100644 --- a/modules/compiler/vecz/test/lit/llvm/partial_linearization12.ll +++ b/modules/compiler/vecz/test/lit/llvm/partial_linearization12.ll @@ -512,7 +512,9 @@ attributes #2 = { nobuiltin nounwind readonly } ; CHECK: [[IFTHEN38]]: ; CHECK: %[[CMP42:.+]] = icmp slt i32 -; CHECK: br i1 %[[CMP42]], label %[[IFTHEN44:.+]], label %[[IFELSE46:.+]] +; CHECK: %[[CMP42_ACTIVE:.+]] = and i1 %[[CMP42]], {{%.*}} +; CHECK: %[[CMP42_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[CMP42_ACTIVE]]) +; CHECK: br i1 %[[CMP42_ACTIVE_ANY]], label %[[IFTHEN44:.+]], label %[[IFELSE46:.+]] ; CHECK: [[IFTHEN44]]: ; CHECK: br label %[[IFELSE48:.+]] @@ -522,7 +524,9 @@ attributes #2 = { nobuiltin nounwind readonly } ; CHECK: [[IFELSE48]]: ; CHECK: %[[CMP49:.+]] = icmp -; CHECK: br i1 %[[CMP49]], label %[[IFTHEN51:.+]], label %[[IFELSE53:.+]] +; CHECK: %[[CMP49_ACTIVE:.+]] = and i1 %[[CMP49]], {{%.*}} +; CHECK: %[[CMP49_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[CMP49_ACTIVE]]) +; CHECK: br i1 %[[CMP49_ACTIVE_ANY]], label %[[IFTHEN51:.+]], label %[[IFELSE53:.+]] ; CHECK: [[IFTHEN51]]: ; CHECK: br label %[[N58:.+]] diff --git a/modules/compiler/vecz/test/lit/llvm/partial_linearization13.ll b/modules/compiler/vecz/test/lit/llvm/partial_linearization13.ll index 5385a5ab9..a5150be43 100644 --- a/modules/compiler/vecz/test/lit/llvm/partial_linearization13.ll +++ b/modules/compiler/vecz/test/lit/llvm/partial_linearization13.ll @@ -198,7 +198,9 @@ attributes #2 = { nobuiltin nounwind readonly } ; CHECK: [[IFTHEN4]]: ; CHECK: %[[TMP:.+]] = and i64 %call1, 1 ; CHECK: %[[TRUNC:.+]] = icmp eq i64 %[[TMP]], 0 -; CHECK: br i1 %[[TRUNC]], label %[[SWBB8:.+]], label %[[SWBB:.+]] +; CHECK: %[[TRUNC_ACTIVE:.+]] = and i1 %[[TRUNC]], {{%.*}} +; CHECK: %[[TRUNC_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[TRUNC_ACTIVE]]) +; CHECK: br i1 %[[TRUNC_ACTIVE_ANY]], label %[[SWBB8:.+]], label %[[SWBB:.+]] ; CHECK: [[SWBB]]: ; CHECK: br label %[[SWBB8]] diff --git a/modules/compiler/vecz/test/lit/llvm/partial_linearization17.ll b/modules/compiler/vecz/test/lit/llvm/partial_linearization17.ll index 4dd7317c2..fa8995233 100644 --- a/modules/compiler/vecz/test/lit/llvm/partial_linearization17.ll +++ b/modules/compiler/vecz/test/lit/llvm/partial_linearization17.ll @@ -275,7 +275,9 @@ attributes #2 = { convergent nobuiltin nounwind readonly } ; CHECK: [[WHILEBODY]]: ; CHECK: %[[CMP:.+]] = icmp -; CHECK: br i1 %[[CMP]], label %[[FORCOND28PREHEADER:.+]], label %[[IFELSE:.+]] +; CHECK: %[[CMP_ACTIVE:.+]] = and i1 %[[CMP]], {{%.*}} +; CHECK: %[[CMP_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[CMP_ACTIVE]]) +; CHECK: br i1 %[[CMP_ACTIVE_ANY]], label %[[FORCOND28PREHEADER:.+]], label %[[IFELSE:.+]] ; CHECK: [[FORCOND28PREHEADER]]: ; CHECK: br label %[[WHILEBODYPUREEXIT:.+]] diff --git a/modules/compiler/vecz/test/lit/llvm/partial_linearization18.ll b/modules/compiler/vecz/test/lit/llvm/partial_linearization18.ll index 3fc928055..42ef4ead7 100644 --- a/modules/compiler/vecz/test/lit/llvm/partial_linearization18.ll +++ b/modules/compiler/vecz/test/lit/llvm/partial_linearization18.ll @@ -218,7 +218,9 @@ attributes #2 = { convergent nobuiltin nounwind readonly } ; CHECK: [[WHILEBODY]]: ; CHECK: %[[CMP:.+]] = icmp -; CHECK: br i1 %[[CMP]], label %[[IFTHEN:.+]], label %[[IFEND:.+]] +; CHECK: %[[CMP_ACTIVE:.+]] = and i1 %[[CMP]], {{%.*}} +; CHECK: %[[CMP_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[CMP_ACTIVE]]) +; CHECK: br i1 %[[CMP_ACTIVE_ANY]], label %[[IFTHEN:.+]], label %[[IFEND:.+]] ; CHECK: [[IFTHEN]]: ; CHECK: br label %[[WHILEBODYPUREEXIT:.+]] diff --git a/modules/compiler/vecz/test/lit/llvm/partial_linearization19.ll b/modules/compiler/vecz/test/lit/llvm/partial_linearization19.ll index b5c52ad8c..171e67318 100644 --- a/modules/compiler/vecz/test/lit/llvm/partial_linearization19.ll +++ b/modules/compiler/vecz/test/lit/llvm/partial_linearization19.ll @@ -231,7 +231,9 @@ attributes #2 = { convergent nobuiltin nounwind readonly } ; CHECK: [[WHILEBODY]]: ; CHECK: %[[CMP:.+]] = icmp -; CHECK: br i1 %[[CMP]], label %[[IFTHEN:.+]], label %[[IFEND:.+]] +; CHECK: %[[CMP_ACTIVE:.+]] = and i1 %[[CMP]], {{%.*}} +; CHECK: %[[CMP_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[CMP_ACTIVE]]) +; CHECK: br i1 %[[CMP_ACTIVE_ANY]], label %[[IFTHEN:.+]], label %[[IFEND:.+]] ; CHECK: [[IFTHEN]]: ; CHECK: %[[CMP2:.+]] = icmp diff --git a/modules/compiler/vecz/test/lit/llvm/partial_linearization5.ll b/modules/compiler/vecz/test/lit/llvm/partial_linearization5.ll index 520b069f5..9db40f122 100644 --- a/modules/compiler/vecz/test/lit/llvm/partial_linearization5.ll +++ b/modules/compiler/vecz/test/lit/llvm/partial_linearization5.ll @@ -187,7 +187,9 @@ attributes #2 = { nobuiltin nounwind readonly } ; CHECK: [[IFELSE5]]: ; CHECK: %[[CMP7:.+]] = icmp -; CHECK: br i1 %[[CMP7]], label %[[IFTHEN]], label %[[FORCOND14PREHEADER:.+]] +; CHECK: %[[CMP7_ACTIVE:.+]] = and i1 %[[CMP7]], {{%.*}} +; CHECK: %[[CMP7_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[CMP7_ACTIVE]]) +; CHECK: br i1 %[[CMP7_ACTIVE_ANY]], label %[[IFTHEN]], label %[[FORCOND14PREHEADER:.+]] ; CHECK: [[FORCOND14PREHEADER]]: ; CHECK: br label %[[FORCOND14:.+]] diff --git a/modules/compiler/vecz/test/lit/llvm/partial_linearization7.ll b/modules/compiler/vecz/test/lit/llvm/partial_linearization7.ll index 13bbb4131..4a84dbf04 100644 --- a/modules/compiler/vecz/test/lit/llvm/partial_linearization7.ll +++ b/modules/compiler/vecz/test/lit/llvm/partial_linearization7.ll @@ -204,7 +204,9 @@ attributes #2 = { nobuiltin nounwind readonly } ; CHECK: [[E]]: ; CHECK: %[[CMP13:.+]] = icmp -; CHECK: br i1 %[[CMP13]], label %[[G:.+]], label %[[H:.+]] +; CHECK: %[[CMP13_ACTIVE:.+]] = and i1 %[[CMP13]], {{%.*}} +; CHECK: %[[CMP13_ACTIVE_ANY:.+]] = call i1 @__vecz_b_divergence_any(i1 %[[CMP13_ACTIVE]]) +; CHECK: br i1 %[[CMP13_ACTIVE_ANY]], label %[[G:.+]], label %[[H:.+]] ; CHECK: [[G]]: ; CHECK: br label %[[FORCOND19:.+]]