[llvm] Improve TLI for Darwin libsystem_m functions #109479

jroelofs · 2024-09-20T21:07:25Z

... to ensure we can vectorize these under -veclib=Darwin_libsystem_m

llvmbot · 2024-09-20T21:07:56Z

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: Jon Roelofs (jroelofs)

Changes

... to ensure we can vectorize these under -veclib=Darwin_libsystem_m

Full diff: https://github.com/llvm/llvm-project/pull/109479.diff

2 Files Affected:

(modified) llvm/include/llvm/Analysis/TargetLibraryInfo.h (+26-19)
(added) llvm/test/Transforms/LoopVectorize/AArch64/vectorize-atan2-darwin.ll (+43)

diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index 9e543b844ad768..222ab9f010943d 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -408,35 +408,42 @@ class TargetLibraryInfo {
     switch (F) {
     default: break;
       // clang-format off
-    case LibFunc_copysign:     case LibFunc_copysignf:  case LibFunc_copysignl:
-    case LibFunc_fabs:         case LibFunc_fabsf:      case LibFunc_fabsl:
-    case LibFunc_sin:          case LibFunc_sinf:       case LibFunc_sinl:
-    case LibFunc_cos:          case LibFunc_cosf:       case LibFunc_cosl:
-    case LibFunc_tan:          case LibFunc_tanf:       case LibFunc_tanl:
-    case LibFunc_asin:         case LibFunc_asinf:      case LibFunc_asinl:
     case LibFunc_acos:         case LibFunc_acosf:      case LibFunc_acosl:
+    case LibFunc_acosh:        case LibFunc_acoshf:     case LibFunc_acoshl:
+    case LibFunc_asin:         case LibFunc_asinf:      case LibFunc_asinl:
+    case LibFunc_asinh:        case LibFunc_asinhf:     case LibFunc_asinhl:
+    case LibFunc_atan2:        case LibFunc_atan2f:     case LibFunc_atan2l:
     case LibFunc_atan:         case LibFunc_atanf:      case LibFunc_atanl:
-    case LibFunc_sinh:         case LibFunc_sinhf:      case LibFunc_sinhl:
+    case LibFunc_atanh:        case LibFunc_atanhf:     case LibFunc_atanhl:
+    case LibFunc_cbrt:         case LibFunc_cbrtf:      case LibFunc_cbrtl:
+    case LibFunc_ceil:         case LibFunc_ceilf:      case LibFunc_ceill:
+    case LibFunc_copysign:     case LibFunc_copysignf:  case LibFunc_copysignl:
+    case LibFunc_cos:          case LibFunc_cosf:       case LibFunc_cosl:
     case LibFunc_cosh:         case LibFunc_coshf:      case LibFunc_coshl:
-    case LibFunc_tanh:         case LibFunc_tanhf:      case LibFunc_tanhl:
-    case LibFunc_sqrt:         case LibFunc_sqrtf:      case LibFunc_sqrtl:
-    case LibFunc_sqrt_finite:  case LibFunc_sqrtf_finite:
-                                                   case LibFunc_sqrtl_finite:
+    case LibFunc_erf:          case LibFunc_erff:       case LibFunc_erfl:
+    case LibFunc_exp2:         case LibFunc_exp2f:      case LibFunc_exp2l:
+    case LibFunc_fabs:         case LibFunc_fabsf:      case LibFunc_fabsl:
+    case LibFunc_floor:        case LibFunc_floorf:     case LibFunc_floorl:
     case LibFunc_fmax:         case LibFunc_fmaxf:      case LibFunc_fmaxl:
     case LibFunc_fmin:         case LibFunc_fminf:      case LibFunc_fminl:
-    case LibFunc_floor:        case LibFunc_floorf:     case LibFunc_floorl:
+    case LibFunc_ldexp:        case LibFunc_ldexpf:     case LibFunc_ldexpl:
+    case LibFunc_log2:         case LibFunc_log2f:      case LibFunc_log2l:
+    case LibFunc_memcmp:       case LibFunc_bcmp:       case LibFunc_strcmp:
+    case LibFunc_memcpy:       case LibFunc_memset:     case LibFunc_memmove:
     case LibFunc_nearbyint:    case LibFunc_nearbyintf: case LibFunc_nearbyintl:
-    case LibFunc_ceil:         case LibFunc_ceilf:      case LibFunc_ceill:
+    case LibFunc_pow:          case LibFunc_powf:       case LibFunc_powl:
     case LibFunc_rint:         case LibFunc_rintf:      case LibFunc_rintl:
     case LibFunc_round:        case LibFunc_roundf:     case LibFunc_roundl:
-    case LibFunc_trunc:        case LibFunc_truncf:     case LibFunc_truncl:
-    case LibFunc_log2:         case LibFunc_log2f:      case LibFunc_log2l:
-    case LibFunc_exp2:         case LibFunc_exp2f:      case LibFunc_exp2l:
-    case LibFunc_ldexp:        case LibFunc_ldexpf:     case LibFunc_ldexpl:
-    case LibFunc_memcpy:       case LibFunc_memset:     case LibFunc_memmove:
-    case LibFunc_memcmp:       case LibFunc_bcmp:       case LibFunc_strcmp:
+    case LibFunc_sin:          case LibFunc_sinf:       case LibFunc_sinl:
+    case LibFunc_sinh:         case LibFunc_sinhf:      case LibFunc_sinhl:
+    case LibFunc_sqrt:         case LibFunc_sqrtf:      case LibFunc_sqrtl:
+    case LibFunc_sqrt_finite:  case LibFunc_sqrtf_finite:
+                                                   case LibFunc_sqrtl_finite:
     case LibFunc_strcpy:       case LibFunc_stpcpy:     case LibFunc_strlen:
     case LibFunc_strnlen:      case LibFunc_memchr:     case LibFunc_mempcpy:
+    case LibFunc_tan:          case LibFunc_tanf:       case LibFunc_tanl:
+    case LibFunc_tanh:         case LibFunc_tanhf:      case LibFunc_tanhl:
+    case LibFunc_trunc:        case LibFunc_truncf:     case LibFunc_truncl:
       // clang-format on
       return true;
     }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vectorize-atan2-darwin.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vectorize-atan2-darwin.ll
new file mode 100644
index 00000000000000..fd56d03d678846
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vectorize-atan2-darwin.ll
@@ -0,0 +1,43 @@
+; RUN: opt -passes='default<O2>' -vector-library=Darwin_libsystem_m -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios"
+
+declare float @atan2f(float, float)
+
+define void @foo(ptr noalias nocapture %ptrA,
+                 ptr noalias nocapture readonly %ptrB,
+                 ptr noalias nocapture readonly %ptrC,
+                 i64 %size) {
+; CHECK-LABEL: @foo(
+; CHECK: call <4 x float> @_simd_atan2_f4(<4 x float>
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %exitcond = icmp eq i64 %indvars.iv, %size
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds float, ptr %ptrB, i64 %indvars.iv
+  %src1 = load float, ptr %arrayidx, align 4
+
+  %arrayidx2 = getelementptr inbounds float, ptr %ptrC, i64 %indvars.iv
+  %src2 = load float, ptr %arrayidx, align 4
+
+  %arrayidx3 = getelementptr inbounds float, ptr %ptrA, i64 %indvars.iv
+
+  %phase = call float @atan2f(float %src1, float %src2)
+
+  store float %phase, ptr %arrayidx3, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond, !llvm.loop !0
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}

llvm/test/Transforms/LoopVectorize/AArch64/vectorize-atan2-darwin.ll

llvm/include/llvm/Analysis/TargetLibraryInfo.h

farzonl · 2024-09-30T22:35:20Z

llvm/test/Transforms/LoopVectorize/AArch64/vectorize-atan2-darwin.ll

@@ -0,0 +1,43 @@
+; RUN: opt -vector-library=Darwin_libsystem_m -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"


I don't know if adding a new test file is the right approach.
Most of the other tests live in:
llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll

Why would atan2 get its own file?

Can we get atan2 added to isTriviallyVectorizable please?

sure, I can address both of these. maybe sometime next week.

@tex3d is working on a PR that would add atan2 to isTriviallyVectorizable. He might have some perspective on this PR.

I don't know if adding a new test file is the right approach.

fixed this by moving the test into an assertion inside addVectorizableFunctions. Now it's a bit harder to miss the entry in hasOptimizedCodeGen when adding new ones.

feel free to pick e73b49a into your PR, and I'll revert it from mine, and then resurrect the test that covers the hasOptimizedCodeGen bit that I removed because of #109479 (comment)

The changes I pushed to my PR included adding atan2 cases to hasOptimizedCodeGen in llvm/include/llvm/Analysis/TargetLibraryInfo.h. Should I revert that one change? I'm currently testing without it to see if it has no measurable effect on my PR.

you can keep them. now that that table is sorted, we shouldn't have much of a (or any?) merge conflict.

#113637 (review)

jroelofs · 2024-10-25T23:16:38Z

landed the first bit as: 75c1c26

... by ensuring parity between hasOptimizedCodegen and addVectorizableFunctionsFromVecLib

@tex3d

This reverts commit e73b49a. we're moving that over to @tex3d's PR: llvm#109479 (comment)

jroelofs requested review from fhahn and RKSimon September 20, 2024 21:07

llvmbot added llvm:analysis llvm:transforms labels Sep 20, 2024

fhahn reviewed Sep 25, 2024

View reviewed changes

llvm/test/Transforms/LoopVectorize/AArch64/vectorize-atan2-darwin.ll Outdated Show resolved Hide resolved

llvm/include/llvm/Analysis/TargetLibraryInfo.h Show resolved Hide resolved

jroelofs force-pushed the jroelofs/atan2-vectorize branch from e4c7350 to 95b6d97 Compare September 25, 2024 15:23

farzonl reviewed Sep 30, 2024

View reviewed changes

jroelofs force-pushed the jroelofs/atan2-vectorize branch from 95b6d97 to 6036c17 Compare October 25, 2024 21:20

jroelofs added 2 commits October 25, 2024 16:16

[llvm] Improve TLI for Darwin libsystem_m functions

d6c0089

... by ensuring parity between hasOptimizedCodegen and addVectorizableFunctionsFromVecLib

[llvm][TLI] Add Accelerate.framework's vatan2f libfunc

e73b49a

jroelofs force-pushed the jroelofs/atan2-vectorize branch from 15aff70 to e73b49a Compare October 25, 2024 23:17

jroelofs added 3 commits November 5, 2024 18:57

Revert "[llvm][TLI] Add Accelerate.framework's vatan2f libfunc"

e74abd1

This reverts commit e73b49a. we're moving that over to @tex3d's PR: llvm#109479 (comment)

Merge branch 'main' into jroelofs/atan2-vectorize

cde6c71

add more libfuncs that the assertion catches

242dadc

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[llvm] Improve TLI for Darwin libsystem_m functions #109479

[llvm] Improve TLI for Darwin libsystem_m functions #109479

jroelofs commented Sep 20, 2024

llvmbot commented Sep 20, 2024 •

edited

Loading

farzonl Sep 30, 2024

RKSimon Oct 3, 2024

jroelofs Oct 3, 2024

farzonl Oct 3, 2024

jroelofs Oct 25, 2024

jroelofs Oct 28, 2024

tex3d Oct 28, 2024

jroelofs Oct 28, 2024

jroelofs Oct 29, 2024

tex3d Oct 29, 2024

jroelofs commented Oct 25, 2024

		@@ -0,0 +1,43 @@
		; RUN: opt -vector-library=Darwin_libsystem_m -passes=inject-tli-mappings,loop-vectorize -S < %s \| FileCheck %s

		target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"

[llvm] Improve TLI for Darwin libsystem_m functions #109479

Are you sure you want to change the base?

[llvm] Improve TLI for Darwin libsystem_m functions #109479

Conversation

jroelofs commented Sep 20, 2024

llvmbot commented Sep 20, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

jroelofs commented Oct 25, 2024

llvmbot commented Sep 20, 2024 •

edited

Loading