Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Analysis] atan2: isTriviallyVectorizable; add to massv and accelerate veclibs #113637

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/include/llvm/Analysis/TargetLibraryInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,7 @@ class TargetLibraryInfo {
// clang-format off
case LibFunc_acos: case LibFunc_acosf: case LibFunc_acosl:
case LibFunc_asin: case LibFunc_asinf: case LibFunc_asinl:
case LibFunc_atan2: case LibFunc_atan2f: case LibFunc_atan2l:
case LibFunc_atan: case LibFunc_atanf: case LibFunc_atanl:
case LibFunc_ceil: case LibFunc_ceilf: case LibFunc_ceill:
case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl:
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ class TargetTransformInfoImplBase {
Name == "asin" || Name == "asinf" || Name == "asinl" ||
Name == "acos" || Name == "acosf" || Name == "acosl" ||
Name == "atan" || Name == "atanf" || Name == "atanl" ||
Name == "atan2" || Name == "atan2f" || Name == "atan2l"||
Name == "sinh" || Name == "sinhf" || Name == "sinhl" ||
Name == "cosh" || Name == "coshf" || Name == "coshl" ||
Name == "tanh" || Name == "tanhf" || Name == "tanhl" ||
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/Analysis/VecFuncs.def
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ TLI_DEFINE_VECFUNC("acosf", "vacosf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.acos.f32", "vacosf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atanf", "vatanf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.atan.f32", "vatanf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atan2f", "vatan2f", FIXED(4), "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("llvm.atan2.f32", "vatan2f", FIXED(4), "_ZGV_LLVM_N4vv")

// Hyperbolic Functions
TLI_DEFINE_VECFUNC("sinhf", "vsinhf", FIXED(4), "_ZGV_LLVM_N4v")
Expand Down Expand Up @@ -289,7 +291,9 @@ TLI_DEFINE_VECFUNC("acosf", "__acosf4", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atan", "__atand2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("atanf", "__atanf4", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atan2", "__atan2d2", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("llvm.atan2.f64", "__atan2d2", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("atan2f", "__atan2f4", FIXED(4), "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("llvm.atan2.f32", "__atan2f4", FIXED(4), "_ZGV_LLVM_N4vv")

// Hyperbolic Functions
TLI_DEFINE_VECFUNC("sinh", "__sinhd2", FIXED(2), "_ZGV_LLVM_N2v")
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Analysis/ValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4230,6 +4230,10 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
case LibFunc_atanf:
case LibFunc_atanl:
return Intrinsic::atan;
case LibFunc_atan2:
case LibFunc_atan2f:
case LibFunc_atan2l:
return Intrinsic::atan2;
case LibFunc_sinh:
case LibFunc_sinhf:
case LibFunc_sinhl:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Analysis/VectorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::asin:
case Intrinsic::acos:
case Intrinsic::atan:
case Intrinsic::atan2:
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::tan:
Expand Down
46 changes: 46 additions & 0 deletions llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1244,6 +1244,52 @@ for.end:
ret void
}

define void @atan2_f64_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @atan2_f64_intrinsic(
; CHECK: __atan2d2{{.*}}<2 x double>
; CHECK: ret void
;
entry:
br label %for.body

for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to double
%call = tail call double @llvm.atan2.f64(double %conv, double %conv)
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
store double %call, ptr %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body

for.end:
ret void
}

define void @atan2_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @atan2_f32_intrinsic(
; CHECK: __atan2f4{{.*}}<4 x float>
; CHECK: ret void
;
entry:
br label %for.body

for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to float
%call = tail call float @llvm.atan2.f32(float %conv, float %conv)
%arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
store float %call, ptr %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body

for.end:
ret void
}

define void @sinh_f64(ptr nocapture %varray) {
; CHECK-LABEL: @sinh_f64(
; CHECK: __sinhd2{{.*}}<2 x double>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,103 @@ entry:
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @atan2f(float,float) readonly nounwind willreturn
define <4 x float> @atan2_4x(ptr %a, ptr %b) {
; CHECK-LABEL: @atan2_4x(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
; CHECK-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatan2f(<4 x float> [[TMP0]], <4 x float> [[BB]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
; NOACCELERATE-LABEL: @atan2_4x(
; NOACCELERATE-NEXT: entry:
; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
; NOACCELERATE-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; NOACCELERATE-NEXT: [[VECEXTB:%.*]] = extractelement <4 x float> [[BB]], i32 0
; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @atan2f(float [[VECEXT]], float [[VECEXTB]])
; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; NOACCELERATE-NEXT: [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @atan2f(float [[VECEXT_1]], float [[VECEXTB_1]])
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[BB]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.atan2.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]])
; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%bb = load <4 x float>, ptr %b, align 16
%vecext = extractelement <4 x float> %0, i32 0
%vecextb = extractelement <4 x float> %bb, i32 0
%1 = tail call fast float @atan2f(float %vecext, float %vecextb)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%vecextb.1 = extractelement <4 x float> %bb, i32 1
%2 = tail call fast float @atan2f(float %vecext.1, float %vecextb.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%vecextb.2 = extractelement <4 x float> %bb, i32 2
%3 = tail call fast float @atan2f(float %vecext.2, float %vecextb.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%vecextb.3 = extractelement <4 x float> %bb, i32 3
%4 = tail call fast float @atan2f(float %vecext.3, float %vecextb.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
define <4 x float> @int_atan2_4x(ptr %a, ptr %b) {
; CHECK-LABEL: @int_atan2_4x(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
; CHECK-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatan2f(<4 x float> [[TMP0]], <4 x float> [[BB]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
; NOACCELERATE-LABEL: @int_atan2_4x(
; NOACCELERATE-NEXT: entry:
; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
; NOACCELERATE-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; NOACCELERATE-NEXT: [[VECEXTB:%.*]] = extractelement <4 x float> [[BB]], i32 0
; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT]], float [[VECEXTB]])
; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; NOACCELERATE-NEXT: [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT_1]], float [[VECEXTB_1]])
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[BB]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.atan2.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]])
; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%bb = load <4 x float>, ptr %b, align 16
%vecext = extractelement <4 x float> %0, i32 0
%vecextb = extractelement <4 x float> %bb, i32 0
%1 = tail call fast float @llvm.atan2.f32(float %vecext, float %vecextb)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%vecextb.1 = extractelement <4 x float> %bb, i32 1
%2 = tail call fast float @llvm.atan2.f32(float %vecext.1, float %vecextb.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%vecextb.2 = extractelement <4 x float> %bb, i32 2
%3 = tail call fast float @llvm.atan2.f32(float %vecext.2, float %vecextb.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%vecextb.3 = extractelement <4 x float> %bb, i32 3
%4 = tail call fast float @llvm.atan2.f32(float %vecext.3, float %vecextb.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @sinhf(float) readonly nounwind willreturn
define <4 x float> @sinh_4x(ptr %a) {
; CHECK-LABEL: @sinh_4x(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,103 @@ entry:
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @atan2f(float,float) readonly nounwind willreturn
define <4 x float> @atan2_4x(ptr %a, ptr %b) {
; CHECK-LABEL: @atan2_4x(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
; CHECK-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatan2f(<4 x float> [[TMP0]], <4 x float> [[BB]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
; NOACCELERATE-LABEL: @atan2_4x(
; NOACCELERATE-NEXT: entry:
; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
; NOACCELERATE-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; NOACCELERATE-NEXT: [[VECEXTB:%.*]] = extractelement <4 x float> [[BB]], i32 0
; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @atan2f(float [[VECEXT]], float [[VECEXTB]])
; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; NOACCELERATE-NEXT: [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @atan2f(float [[VECEXT_1]], float [[VECEXTB_1]])
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[BB]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.atan2.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]])
; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%bb = load <4 x float>, ptr %b, align 16
%vecext = extractelement <4 x float> %0, i32 0
%vecextb = extractelement <4 x float> %bb, i32 0
%1 = tail call fast float @atan2f(float %vecext, float %vecextb)
%vecins = insertelement <4 x float> undef, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%vecextb.1 = extractelement <4 x float> %bb, i32 1
%2 = tail call fast float @atan2f(float %vecext.1, float %vecextb.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%vecextb.2 = extractelement <4 x float> %bb, i32 2
%3 = tail call fast float @atan2f(float %vecext.2, float %vecextb.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%vecextb.3 = extractelement <4 x float> %bb, i32 3
%4 = tail call fast float @atan2f(float %vecext.3, float %vecextb.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
define <4 x float> @int_atan2_4x(ptr %a, ptr %b) {
; CHECK-LABEL: @int_atan2_4x(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
; CHECK-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatan2f(<4 x float> [[TMP0]], <4 x float> [[BB]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
; NOACCELERATE-LABEL: @int_atan2_4x(
; NOACCELERATE-NEXT: entry:
; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
; NOACCELERATE-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; NOACCELERATE-NEXT: [[VECEXTB:%.*]] = extractelement <4 x float> [[BB]], i32 0
; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT]], float [[VECEXTB]])
; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; NOACCELERATE-NEXT: [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT_1]], float [[VECEXTB_1]])
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[BB]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.atan2.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]])
; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%bb = load <4 x float>, ptr %b, align 16
%vecext = extractelement <4 x float> %0, i32 0
%vecextb = extractelement <4 x float> %bb, i32 0
%1 = tail call fast float @llvm.atan2.f32(float %vecext, float %vecextb)
%vecins = insertelement <4 x float> undef, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%vecextb.1 = extractelement <4 x float> %bb, i32 1
%2 = tail call fast float @llvm.atan2.f32(float %vecext.1, float %vecextb.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%vecextb.2 = extractelement <4 x float> %bb, i32 2
%3 = tail call fast float @llvm.atan2.f32(float %vecext.2, float %vecextb.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%vecextb.3 = extractelement <4 x float> %bb, i32 3
%4 = tail call fast float @llvm.atan2.f32(float %vecext.3, float %vecextb.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @sinhf(float) readonly nounwind willreturn
define <4 x float> @sinh_4x(ptr %a) {
; CHECK-LABEL: @sinh_4x(
Expand Down
Loading