-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[llvm] Improve TLI for Darwin libsystem_m functions #109479
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Jon Roelofs (jroelofs) Changes... to ensure we can vectorize these under -veclib=Darwin_libsystem_m Full diff: https://github.com/llvm/llvm-project/pull/109479.diff 2 Files Affected:
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index 9e543b844ad768..222ab9f010943d 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -408,35 +408,42 @@ class TargetLibraryInfo {
switch (F) {
default: break;
// clang-format off
- case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl:
- case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl:
- case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinl:
- case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl:
- case LibFunc_tan: case LibFunc_tanf: case LibFunc_tanl:
- case LibFunc_asin: case LibFunc_asinf: case LibFunc_asinl:
case LibFunc_acos: case LibFunc_acosf: case LibFunc_acosl:
+ case LibFunc_acosh: case LibFunc_acoshf: case LibFunc_acoshl:
+ case LibFunc_asin: case LibFunc_asinf: case LibFunc_asinl:
+ case LibFunc_asinh: case LibFunc_asinhf: case LibFunc_asinhl:
+ case LibFunc_atan2: case LibFunc_atan2f: case LibFunc_atan2l:
case LibFunc_atan: case LibFunc_atanf: case LibFunc_atanl:
- case LibFunc_sinh: case LibFunc_sinhf: case LibFunc_sinhl:
+ case LibFunc_atanh: case LibFunc_atanhf: case LibFunc_atanhl:
+ case LibFunc_cbrt: case LibFunc_cbrtf: case LibFunc_cbrtl:
+ case LibFunc_ceil: case LibFunc_ceilf: case LibFunc_ceill:
+ case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl:
+ case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl:
case LibFunc_cosh: case LibFunc_coshf: case LibFunc_coshl:
- case LibFunc_tanh: case LibFunc_tanhf: case LibFunc_tanhl:
- case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl:
- case LibFunc_sqrt_finite: case LibFunc_sqrtf_finite:
- case LibFunc_sqrtl_finite:
+ case LibFunc_erf: case LibFunc_erff: case LibFunc_erfl:
+ case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2l:
+ case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl:
+ case LibFunc_floor: case LibFunc_floorf: case LibFunc_floorl:
case LibFunc_fmax: case LibFunc_fmaxf: case LibFunc_fmaxl:
case LibFunc_fmin: case LibFunc_fminf: case LibFunc_fminl:
- case LibFunc_floor: case LibFunc_floorf: case LibFunc_floorl:
+ case LibFunc_ldexp: case LibFunc_ldexpf: case LibFunc_ldexpl:
+ case LibFunc_log2: case LibFunc_log2f: case LibFunc_log2l:
+ case LibFunc_memcmp: case LibFunc_bcmp: case LibFunc_strcmp:
+ case LibFunc_memcpy: case LibFunc_memset: case LibFunc_memmove:
case LibFunc_nearbyint: case LibFunc_nearbyintf: case LibFunc_nearbyintl:
- case LibFunc_ceil: case LibFunc_ceilf: case LibFunc_ceill:
+ case LibFunc_pow: case LibFunc_powf: case LibFunc_powl:
case LibFunc_rint: case LibFunc_rintf: case LibFunc_rintl:
case LibFunc_round: case LibFunc_roundf: case LibFunc_roundl:
- case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl:
- case LibFunc_log2: case LibFunc_log2f: case LibFunc_log2l:
- case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2l:
- case LibFunc_ldexp: case LibFunc_ldexpf: case LibFunc_ldexpl:
- case LibFunc_memcpy: case LibFunc_memset: case LibFunc_memmove:
- case LibFunc_memcmp: case LibFunc_bcmp: case LibFunc_strcmp:
+ case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinl:
+ case LibFunc_sinh: case LibFunc_sinhf: case LibFunc_sinhl:
+ case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl:
+ case LibFunc_sqrt_finite: case LibFunc_sqrtf_finite:
+ case LibFunc_sqrtl_finite:
case LibFunc_strcpy: case LibFunc_stpcpy: case LibFunc_strlen:
case LibFunc_strnlen: case LibFunc_memchr: case LibFunc_mempcpy:
+ case LibFunc_tan: case LibFunc_tanf: case LibFunc_tanl:
+ case LibFunc_tanh: case LibFunc_tanhf: case LibFunc_tanhl:
+ case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl:
// clang-format on
return true;
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vectorize-atan2-darwin.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vectorize-atan2-darwin.ll
new file mode 100644
index 00000000000000..fd56d03d678846
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vectorize-atan2-darwin.ll
@@ -0,0 +1,43 @@
+; RUN: opt -passes='default<O2>' -vector-library=Darwin_libsystem_m -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios"
+
+declare float @atan2f(float, float)
+
+define void @foo(ptr noalias nocapture %ptrA,
+ ptr noalias nocapture readonly %ptrB,
+ ptr noalias nocapture readonly %ptrC,
+ i64 %size) {
+; CHECK-LABEL: @foo(
+; CHECK: call <4 x float> @_simd_atan2_f4(<4 x float>
+;
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %exitcond = icmp eq i64 %indvars.iv, %size
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.body: ; preds = %for.cond
+ %arrayidx = getelementptr inbounds float, ptr %ptrB, i64 %indvars.iv
+ %src1 = load float, ptr %arrayidx, align 4
+
+ %arrayidx2 = getelementptr inbounds float, ptr %ptrC, i64 %indvars.iv
+ %src2 = load float, ptr %arrayidx, align 4
+
+ %arrayidx3 = getelementptr inbounds float, ptr %ptrA, i64 %indvars.iv
+
+ %phase = call float @atan2f(float %src1, float %src2)
+
+ store float %phase, ptr %arrayidx3, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %for.cond, !llvm.loop !0
+
+for.cond.cleanup: ; preds = %for.cond
+ ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}
|
llvm/test/Transforms/LoopVectorize/AArch64/vectorize-atan2-darwin.ll
Outdated
Show resolved
Hide resolved
e4c7350
to
95b6d97
Compare
@@ -0,0 +1,43 @@ | |||
; RUN: opt -vector-library=Darwin_libsystem_m -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s | |||
|
|||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't know if adding a new test file is the right approach.
Most of the other tests live in:
llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll
Why would atan2 get its own file?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we get atan2 added to isTriviallyVectorizable please?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sure, I can address both of these. maybe sometime next week.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@tex3d is working on a PR that would add atan2 to isTriviallyVectorizable
. He might have some perspective on this PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't know if adding a new test file is the right approach.
fixed this by moving the test into an assertion inside addVectorizableFunctions
. Now it's a bit harder to miss the entry in hasOptimizedCodeGen
when adding new ones.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
feel free to pick e73b49a into your PR, and I'll revert it from mine, and then resurrect the test that covers the hasOptimizedCodeGen
bit that I removed because of #109479 (comment)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The changes I pushed to my PR included adding atan2 cases to hasOptimizedCodeGen
in llvm/include/llvm/Analysis/TargetLibraryInfo.h
. Should I revert that one change? I'm currently testing without it to see if it has no measurable effect on my PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you can keep them. now that that table is sorted, we shouldn't have much of a (or any?) merge conflict.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks!
95b6d97
to
6036c17
Compare
landed the first bit as: 75c1c26 |
... by ensuring parity between hasOptimizedCodegen and addVectorizableFunctionsFromVecLib
15aff70
to
e73b49a
Compare
This reverts commit e73b49a. we're moving that over to @tex3d's PR: llvm#109479 (comment)
... to ensure we can vectorize these under -veclib=Darwin_libsystem_m