From 800a47d6cd33ea1c2a888ceb67d566366c61e7ed Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 25 Oct 2024 10:17:16 -0400 Subject: [PATCH 01/39] [libc++][NFC] Fix include guards inside locale_base_api --- libcxx/include/__locale_dir/locale_base_api/android.h | 6 +++--- .../__locale_dir/locale_base_api/bsd_locale_defaults.h | 6 +++--- .../__locale_dir/locale_base_api/bsd_locale_fallbacks.h | 6 +++--- libcxx/include/__locale_dir/locale_base_api/fuchsia.h | 6 +++--- libcxx/include/__locale_dir/locale_base_api/ibm.h | 6 +++--- libcxx/include/__locale_dir/locale_base_api/locale_guard.h | 6 +++--- libcxx/include/__locale_dir/locale_base_api/musl.h | 6 +++--- libcxx/include/__locale_dir/locale_base_api/newlib.h | 6 +++--- libcxx/include/__locale_dir/locale_base_api/openbsd.h | 6 +++--- libcxx/include/__locale_dir/locale_base_api/win32.h | 6 +++--- 10 files changed, 30 insertions(+), 30 deletions(-) diff --git a/libcxx/include/__locale_dir/locale_base_api/android.h b/libcxx/include/__locale_dir/locale_base_api/android.h index 9965d8bbf6a2ecc..08ef5407dedf4e0 100644 --- a/libcxx/include/__locale_dir/locale_base_api/android.h +++ b/libcxx/include/__locale_dir/locale_base_api/android.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_ANDROID_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_ANDROID_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_ANDROID_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_ANDROID_H #include @@ -47,4 +47,4 @@ inline _LIBCPP_HIDE_FROM_ABI double strtod_l(const char* __nptr, char** __endptr # endif // __NDK_MAJOR__ <= 16 #endif // __has_include() -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_ANDROID_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_ANDROID_H diff --git a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_defaults.h b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_defaults.h index 1f9607209842cad..e88eb4fa41d7af9 100644 --- a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_defaults.h +++ b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_defaults.h @@ -11,8 +11,8 @@ // we will define the mapping from an internal macro to the real BSD symbol. //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -33,4 +33,4 @@ #define __libcpp_asprintf_l(...) asprintf_l(__VA_ARGS__) #define __libcpp_sscanf_l(...) sscanf_l(__VA_ARGS__) -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H diff --git a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h index 76b94287cd6cc88..5f99c7aea02a96a 100644 --- a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h +++ b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h @@ -10,8 +10,8 @@ // of those functions for non-BSD platforms. //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H #include <__locale_dir/locale_base_api/locale_guard.h> #include @@ -123,4 +123,4 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __libcpp_sscanf_l( _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H diff --git a/libcxx/include/__locale_dir/locale_base_api/fuchsia.h b/libcxx/include/__locale_dir/locale_base_api/fuchsia.h index 4c3440f981c6d08..f6ef454ba7ada75 100644 --- a/libcxx/include/__locale_dir/locale_base_api/fuchsia.h +++ b/libcxx/include/__locale_dir/locale_base_api/fuchsia.h @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_FUCHSIA_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_FUCHSIA_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_FUCHSIA_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_FUCHSIA_H #include <__support/xlocale/__posix_l_fallback.h> #include <__support/xlocale/__strtonum_fallback.h> #include #include -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_FUCHSIA_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_FUCHSIA_H diff --git a/libcxx/include/__locale_dir/locale_base_api/ibm.h b/libcxx/include/__locale_dir/locale_base_api/ibm.h index fa3bc1c3633f5dc..1d1d15df9f7995e 100644 --- a/libcxx/include/__locale_dir/locale_base_api/ibm.h +++ b/libcxx/include/__locale_dir/locale_base_api/ibm.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_IBM_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_IBM_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_IBM_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_IBM_H #if defined(__MVS__) # include <__support/ibm/locale_mgmt_zos.h> @@ -105,4 +105,4 @@ _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 2, 0) int vasprintf(char** strp, const char return str_size; } -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_IBM_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_IBM_H diff --git a/libcxx/include/__locale_dir/locale_base_api/locale_guard.h b/libcxx/include/__locale_dir/locale_base_api/locale_guard.h index 2baacb51cd06555..7d15f2d253adc39 100644 --- a/libcxx/include/__locale_dir/locale_base_api/locale_guard.h +++ b/libcxx/include/__locale_dir/locale_base_api/locale_guard.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_LOCALE_GUARD_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_LOCALE_GUARD_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_LOCALE_GUARD_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_LOCALE_GUARD_H #include <__config> #include <__locale> // for locale_t @@ -75,4 +75,4 @@ struct __libcpp_locale_guard { _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_LOCALE_GUARD_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_LOCALE_GUARD_H diff --git a/libcxx/include/__locale_dir/locale_base_api/musl.h b/libcxx/include/__locale_dir/locale_base_api/musl.h index bf7b849d5863421..1653214cdba1e39 100644 --- a/libcxx/include/__locale_dir/locale_base_api/musl.h +++ b/libcxx/include/__locale_dir/locale_base_api/musl.h @@ -14,8 +14,8 @@ // in Musl. //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_MUSL_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_MUSL_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H #include #include @@ -28,4 +28,4 @@ inline _LIBCPP_HIDE_FROM_ABI unsigned long long strtoull_l(const char* __nptr, c return ::strtoull(__nptr, __endptr, __base); } -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_MUSL_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H diff --git a/libcxx/include/__locale_dir/locale_base_api/newlib.h b/libcxx/include/__locale_dir/locale_base_api/newlib.h index a8c1cff16e6d800..7da10e5889843dd 100644 --- a/libcxx/include/__locale_dir/locale_base_api/newlib.h +++ b/libcxx/include/__locale_dir/locale_base_api/newlib.h @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_NEWLIB_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_NEWLIB_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_NEWLIB_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_NEWLIB_H -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_NEWLIB_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_NEWLIB_H diff --git a/libcxx/include/__locale_dir/locale_base_api/openbsd.h b/libcxx/include/__locale_dir/locale_base_api/openbsd.h index 0c05d6a0f788747..d4fb224e0c80a09 100644 --- a/libcxx/include/__locale_dir/locale_base_api/openbsd.h +++ b/libcxx/include/__locale_dir/locale_base_api/openbsd.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_OPENBSD_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_OPENBSD_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_OPENBSD_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_OPENBSD_H #include <__support/xlocale/__strtonum_fallback.h> #include @@ -16,4 +16,4 @@ #include #include -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_OPENBSD_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_OPENBSD_H diff --git a/libcxx/include/__locale_dir/locale_base_api/win32.h b/libcxx/include/__locale_dir/locale_base_api/win32.h index f66baffb6920456..f488a0dc0d69b3f 100644 --- a/libcxx/include/__locale_dir/locale_base_api/win32.h +++ b/libcxx/include/__locale_dir/locale_base_api/win32.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_WIN32_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_WIN32_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_WIN32_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_WIN32_H #include <__config> #include @@ -232,4 +232,4 @@ _LIBCPP_EXPORTED_FROM_ABI int vasprintf_l(char** __ret, locale_t __loc, const ch // not-so-pressing FIXME: use locale to determine blank characters inline int iswblank_l(wint_t __c, locale_t /*loc*/) { return (__c == L' ' || __c == L'\t'); } -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_WIN32_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_WIN32_H From 577c7dd7cc4c5a9f62f9654cfa30ee9d55709426 Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 25 Oct 2024 15:20:24 +0100 Subject: [PATCH 02/39] [AArch64] Add a phase-ordering test for vectorizing predicated selects. NFC --- .../AArch64/predicated-reduction.ll | 294 ++++++++++++++++++ 1 file changed, 294 insertions(+) create mode 100644 llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll new file mode 100644 index 000000000000000..7274e952567693d --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll @@ -0,0 +1,294 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes="default" -S < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "aarch64" + +define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef %RAND_BLOCK_LENGTH, ptr noundef %samples, double noundef nofpclass(nan inf) %Y, double noundef nofpclass(nan inf) %Z) { +; CHECK-LABEL: define nofpclass(nan inf) double @monte_simple( +; CHECK-SAME: i32 noundef [[NBLOCKS:%.*]], i32 noundef [[RAND_BLOCK_LENGTH:%.*]], ptr nocapture noundef readonly [[SAMPLES:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[RAND_BLOCK_LENGTH]], 0 +; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[V1_011:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[V1_1:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[V0_010:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[V0_1:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double +; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[Y]], [[CONV]] +; CHECK-NEXT: [[SUB:%.*]] = fsub fast double [[MUL]], [[Z]] +; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ogt double [[SUB]], 0.000000e+00 +; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[SUB]], [[V0_010]] +; CHECK-NEXT: [[MUL3:%.*]] = fmul fast double [[SUB]], [[SUB]] +; CHECK-NEXT: [[ADD4:%.*]] = fadd fast double [[MUL3]], [[V1_011]] +; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], double [[ADD]], double [[V0_010]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP1]], double [[ADD4]], double [[V1_011]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[FOR_END_LOOPEXIT]]: +; CHECK-NEXT: [[TMP1:%.*]] = fadd fast double [[V1_1]], [[V0_1]] +; CHECK-NEXT: br label %[[FOR_END]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[ADD5:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret double [[ADD5]] +; +entry: + %nblocks.addr = alloca i32, align 4 + %RAND_BLOCK_LENGTH.addr = alloca i32, align 4 + %samples.addr = alloca ptr, align 8 + %Y.addr = alloca double, align 8 + %Z.addr = alloca double, align 8 + %i = alloca i32, align 4 + %block = alloca i32, align 4 + %rngVal = alloca double, align 8 + %callValue = alloca double, align 8 + %v0 = alloca double, align 8 + %v1 = alloca double, align 8 + store i32 %nblocks, ptr %nblocks.addr, align 4 + store i32 %RAND_BLOCK_LENGTH, ptr %RAND_BLOCK_LENGTH.addr, align 4 + store ptr %samples, ptr %samples.addr, align 8 + store double %Y, ptr %Y.addr, align 8 + store double %Z, ptr %Z.addr, align 8 + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2 + call void @llvm.lifetime.start.p0(i64 4, ptr %block) #2 + call void @llvm.lifetime.start.p0(i64 8, ptr %rngVal) #2 + call void @llvm.lifetime.start.p0(i64 8, ptr %callValue) #2 + call void @llvm.lifetime.start.p0(i64 8, ptr %v0) #2 + store double 0.000000e+00, ptr %v0, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr %v1) #2 + store double 0.000000e+00, ptr %v1, align 8 + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4 + %1 = load i32, ptr %RAND_BLOCK_LENGTH.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %2 = load ptr, ptr %samples.addr, align 8 + %3 = load i32, ptr %i, align 4 + %idxprom = sext i32 %3 to i64 + %arrayidx = getelementptr inbounds float, ptr %2, i64 %idxprom + %4 = load float, ptr %arrayidx, align 4 + %conv = fpext float %4 to double + store double %conv, ptr %rngVal, align 8 + %5 = load double, ptr %Y.addr, align 8 + %6 = load double, ptr %rngVal, align 8 + %mul = fmul fast double %5, %6 + %7 = load double, ptr %Z.addr, align 8 + %sub = fsub fast double %mul, %7 + store double %sub, ptr %callValue, align 8 + %8 = load double, ptr %callValue, align 8 + %cmp1 = fcmp fast ogt double %8, 0.000000e+00 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %9 = load double, ptr %callValue, align 8 + %10 = load double, ptr %v0, align 8 + %add = fadd fast double %10, %9 + store double %add, ptr %v0, align 8 + %11 = load double, ptr %callValue, align 8 + %12 = load double, ptr %callValue, align 8 + %mul3 = fmul fast double %11, %12 + %13 = load double, ptr %v1, align 8 + %add4 = fadd fast double %13, %mul3 + store double %add4, ptr %v1, align 8 + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %14 = load i32, ptr %i, align 4 + %inc = add nsw i32 %14, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %15 = load double, ptr %v0, align 8 + %16 = load double, ptr %v1, align 8 + %add5 = fadd fast double %15, %16 + call void @llvm.lifetime.end.p0(i64 8, ptr %v1) #2 + call void @llvm.lifetime.end.p0(i64 8, ptr %v0) #2 + call void @llvm.lifetime.end.p0(i64 8, ptr %callValue) #2 + call void @llvm.lifetime.end.p0(i64 8, ptr %rngVal) #2 + call void @llvm.lifetime.end.p0(i64 4, ptr %block) #2 + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2 + ret double %add5 +} + +define nofpclass(nan inf) double @monte_exp(i32 noundef %nblocks, i32 noundef %RAND_BLOCK_LENGTH, ptr noundef %samples, double noundef nofpclass(nan inf) %Y, double noundef nofpclass(nan inf) %Z) { +; CHECK-LABEL: define nofpclass(nan inf) double @monte_exp( +; CHECK-SAME: i32 noundef [[NBLOCKS:%.*]], i32 noundef [[RAND_BLOCK_LENGTH:%.*]], ptr noundef [[SAMPLES:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[NBLOCKS]], 0 +; CHECK-NEXT: br i1 [[CMP16]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END10:.*]] +; CHECK: [[FOR_BODY_LR_PH]]: +; CHECK-NEXT: [[CMP211:%.*]] = icmp sgt i32 [[RAND_BLOCK_LENGTH]], 0 +; CHECK-NEXT: br i1 [[CMP211]], label %[[FOR_BODY_US_PREHEADER:.*]], label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY_US_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64 +; CHECK-NEXT: br label %[[FOR_BODY_US:.*]] +; CHECK: [[FOR_BODY_US]]: +; CHECK-NEXT: [[V1_019_US:%.*]] = phi double [ [[V1_2_US:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US:.*]] ], [ 0.000000e+00, %[[FOR_BODY_US_PREHEADER]] ] +; CHECK-NEXT: [[V0_018_US:%.*]] = phi double [ [[V0_2_US:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0.000000e+00, %[[FOR_BODY_US_PREHEADER]] ] +; CHECK-NEXT: [[BLOCK_017_US:%.*]] = phi i32 [ [[INC9_US:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0, %[[FOR_BODY_US_PREHEADER]] ] +; CHECK-NEXT: tail call void @resample(i32 noundef [[RAND_BLOCK_LENGTH]], ptr noundef [[SAMPLES]]) +; CHECK-NEXT: br label %[[FOR_BODY3_US:.*]] +; CHECK: [[FOR_BODY3_US]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_US]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY3_US]] ] +; CHECK-NEXT: [[V1_114_US:%.*]] = phi double [ [[V1_019_US]], %[[FOR_BODY_US]] ], [ [[V1_2_US]], %[[FOR_BODY3_US]] ] +; CHECK-NEXT: [[V0_113_US:%.*]] = phi double [ [[V0_018_US]], %[[FOR_BODY_US]] ], [ [[V0_2_US]], %[[FOR_BODY3_US]] ] +; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX_US]], align 4 +; CHECK-NEXT: [[CONV_US:%.*]] = fpext float [[TMP0]] to double +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast double @llvm.exp2.f64(double [[CONV_US]]) +; CHECK-NEXT: [[MUL_US:%.*]] = fmul fast double [[TMP1]], [[Y]] +; CHECK-NEXT: [[SUB_US:%.*]] = fsub fast double [[MUL_US]], [[Z]] +; CHECK-NEXT: [[CMP4_US:%.*]] = fcmp fast ogt double [[SUB_US]], 0.000000e+00 +; CHECK-NEXT: [[ADD_US:%.*]] = fadd fast double [[SUB_US]], [[V0_113_US]] +; CHECK-NEXT: [[MUL6_US:%.*]] = fmul fast double [[SUB_US]], [[SUB_US]] +; CHECK-NEXT: [[ADD7_US:%.*]] = fadd fast double [[MUL6_US]], [[V1_114_US]] +; CHECK-NEXT: [[V0_2_US]] = select i1 [[CMP4_US]], double [[ADD_US]], double [[V0_113_US]] +; CHECK-NEXT: [[V1_2_US]] = select i1 [[CMP4_US]], double [[ADD7_US]], double [[V1_114_US]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND25_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND25_NOT]], label %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]], label %[[FOR_BODY3_US]] +; CHECK: [[FOR_COND1_FOR_INC8_CRIT_EDGE_US]]: +; CHECK-NEXT: [[INC9_US]] = add nuw nsw i32 [[BLOCK_017_US]], 1 +; CHECK-NEXT: [[EXITCOND26_NOT:%.*]] = icmp eq i32 [[INC9_US]], [[NBLOCKS]] +; CHECK-NEXT: br i1 [[EXITCOND26_NOT]], label %[[FOR_END10]], label %[[FOR_BODY_US]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[BLOCK_017:%.*]] = phi i32 [ [[INC9:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_LR_PH]] ] +; CHECK-NEXT: tail call void @resample(i32 noundef [[RAND_BLOCK_LENGTH]], ptr noundef [[SAMPLES]]) +; CHECK-NEXT: [[INC9]] = add nuw nsw i32 [[BLOCK_017]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC9]], [[NBLOCKS]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END10]], label %[[FOR_BODY]] +; CHECK: [[FOR_END10]]: +; CHECK-NEXT: [[V0_0_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[V0_2_US]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0.000000e+00, %[[FOR_BODY]] ] +; CHECK-NEXT: [[V1_0_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[V1_2_US]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0.000000e+00, %[[FOR_BODY]] ] +; CHECK-NEXT: [[ADD11:%.*]] = fadd fast double [[V1_0_LCSSA]], [[V0_0_LCSSA]] +; CHECK-NEXT: ret double [[ADD11]] +; +entry: + %nblocks.addr = alloca i32, align 4 + %RAND_BLOCK_LENGTH.addr = alloca i32, align 4 + %samples.addr = alloca ptr, align 8 + %Y.addr = alloca double, align 8 + %Z.addr = alloca double, align 8 + %i = alloca i32, align 4 + %block = alloca i32, align 4 + %rngVal = alloca double, align 8 + %callValue = alloca double, align 8 + %v0 = alloca double, align 8 + %v1 = alloca double, align 8 + store i32 %nblocks, ptr %nblocks.addr, align 4 + store i32 %RAND_BLOCK_LENGTH, ptr %RAND_BLOCK_LENGTH.addr, align 4 + store ptr %samples, ptr %samples.addr, align 8 + store double %Y, ptr %Y.addr, align 8 + store double %Z, ptr %Z.addr, align 8 + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #4 + call void @llvm.lifetime.start.p0(i64 4, ptr %block) #4 + call void @llvm.lifetime.start.p0(i64 8, ptr %rngVal) #4 + call void @llvm.lifetime.start.p0(i64 8, ptr %callValue) #4 + call void @llvm.lifetime.start.p0(i64 8, ptr %v0) #4 + store double 0.000000e+00, ptr %v0, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr %v1) #4 + store double 0.000000e+00, ptr %v1, align 8 + store i32 0, ptr %block, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc8, %entry + %0 = load i32, ptr %block, align 4 + %1 = load i32, ptr %nblocks.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end10 + +for.body: ; preds = %for.cond + %2 = load i32, ptr %RAND_BLOCK_LENGTH.addr, align 4 + %3 = load ptr, ptr %samples.addr, align 8 + call void @resample(i32 noundef %2, ptr noundef %3) + store i32 0, ptr %i, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %4 = load i32, ptr %i, align 4 + %5 = load i32, ptr %RAND_BLOCK_LENGTH.addr, align 4 + %cmp2 = icmp slt i32 %4, %5 + br i1 %cmp2, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %6 = load ptr, ptr %samples.addr, align 8 + %7 = load i32, ptr %i, align 4 + %idxprom = sext i32 %7 to i64 + %arrayidx = getelementptr inbounds float, ptr %6, i64 %idxprom + %8 = load float, ptr %arrayidx, align 4 + %conv = fpext float %8 to double + store double %conv, ptr %rngVal, align 8 + %9 = load double, ptr %Y.addr, align 8 + %10 = load double, ptr %rngVal, align 8 + %11 = call fast double @llvm.exp2.f64(double %10) + %mul = fmul fast double %9, %11 + %12 = load double, ptr %Z.addr, align 8 + %sub = fsub fast double %mul, %12 + store double %sub, ptr %callValue, align 8 + %13 = load double, ptr %callValue, align 8 + %cmp4 = fcmp fast ogt double %13, 0.000000e+00 + br i1 %cmp4, label %if.then, label %if.end + +if.then: ; preds = %for.body3 + %14 = load double, ptr %callValue, align 8 + %15 = load double, ptr %v0, align 8 + %add = fadd fast double %15, %14 + store double %add, ptr %v0, align 8 + %16 = load double, ptr %callValue, align 8 + %17 = load double, ptr %callValue, align 8 + %mul6 = fmul fast double %16, %17 + %18 = load double, ptr %v1, align 8 + %add7 = fadd fast double %18, %mul6 + store double %add7, ptr %v1, align 8 + br label %if.end + +if.end: ; preds = %if.then, %for.body3 + br label %for.inc + +for.inc: ; preds = %if.end + %19 = load i32, ptr %i, align 4 + %inc = add nsw i32 %19, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc8 + +for.inc8: ; preds = %for.end + %20 = load i32, ptr %block, align 4 + %inc9 = add nsw i32 %20, 1 + store i32 %inc9, ptr %block, align 4 + br label %for.cond + +for.end10: ; preds = %for.cond + %21 = load double, ptr %v0, align 8 + %22 = load double, ptr %v1, align 8 + %add11 = fadd fast double %21, %22 + call void @llvm.lifetime.end.p0(i64 8, ptr %v1) #4 + call void @llvm.lifetime.end.p0(i64 8, ptr %v0) #4 + call void @llvm.lifetime.end.p0(i64 8, ptr %callValue) #4 + call void @llvm.lifetime.end.p0(i64 8, ptr %rngVal) #4 + call void @llvm.lifetime.end.p0(i64 4, ptr %block) #4 + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #4 + ret double %add11 +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) +declare void @resample(i32 noundef, ptr noundef) +declare double @llvm.exp2.f64(double) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) From 9f6c632ecda08bfff76b798c46d5d7cfde57b5e9 Mon Sep 17 00:00:00 2001 From: Andrea Faulds Date: Fri, 25 Oct 2024 16:21:59 +0200 Subject: [PATCH 03/39] [mlir][mlir-spirv-cpu-runner] Move MLIR pass pipeline to mlir-opt (#113594) Adds a new mlir-opt test-only pass, -test-spirv-cpu-runner-pipeline, which runs the set of MLIR passes needed for the mlir-spirv-cpu-runner, and removes them from the runner. The tests are changed to invoke mlir-opt with this flag before running the runner. The eventual goal is to move all host/device code generation steps out of the runner, like with some of the other runners. Recommit of 17e9752267ed9c81c8da87f3a6d0e01f130b0d04. It was reverted due to a build failure, but the build failure had in fact already been fixed in e7302319b52e3d231216d54d10622b0698928a96. --- mlir/test/lib/Pass/CMakeLists.txt | 1 + .../lib/Pass/TestSPIRVCPURunnerPipeline.cpp | 47 +++++++++++++++++++ mlir/test/mlir-spirv-cpu-runner/double.mlir | 3 +- .../mlir-spirv-cpu-runner/simple_add.mlir | 3 +- mlir/tools/mlir-opt/mlir-opt.cpp | 2 + .../mlir-spirv-cpu-runner.cpp | 24 ---------- 6 files changed, 54 insertions(+), 26 deletions(-) create mode 100644 mlir/test/lib/Pass/TestSPIRVCPURunnerPipeline.cpp diff --git a/mlir/test/lib/Pass/CMakeLists.txt b/mlir/test/lib/Pass/CMakeLists.txt index b190f054e50bd1c..f489b7e51e5038a 100644 --- a/mlir/test/lib/Pass/CMakeLists.txt +++ b/mlir/test/lib/Pass/CMakeLists.txt @@ -3,6 +3,7 @@ get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS) add_mlir_library(MLIRTestPass TestDynamicPipeline.cpp TestPassManager.cpp + TestSPIRVCPURunnerPipeline.cpp EXCLUDE_FROM_LIBMLIR diff --git a/mlir/test/lib/Pass/TestSPIRVCPURunnerPipeline.cpp b/mlir/test/lib/Pass/TestSPIRVCPURunnerPipeline.cpp new file mode 100644 index 000000000000000..ded0d22c31307e9 --- /dev/null +++ b/mlir/test/lib/Pass/TestSPIRVCPURunnerPipeline.cpp @@ -0,0 +1,47 @@ +//===------------------ TestSPIRVCPURunnerPipeline.cpp --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements a pipeline for use by mlir-spirv-cpu-runner tests. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/GPUToSPIRV/GPUToSPIRVPass.h" +#include "mlir/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.h" +#include "mlir/Dialect/GPU/Transforms/Passes.h" +#include "mlir/Dialect/SPIRV/IR/SPIRVOps.h" +#include "mlir/Dialect/SPIRV/Transforms/Passes.h" +#include "mlir/Pass/PassManager.h" + +using namespace mlir; + +namespace { + +void buildTestSPIRVCPURunnerPipeline(OpPassManager &passManager) { + passManager.addPass(createGpuKernelOutliningPass()); + passManager.addPass(createConvertGPUToSPIRVPass(/*mapMemorySpace=*/true)); + + OpPassManager &nestedPM = passManager.nest(); + nestedPM.addPass(spirv::createSPIRVLowerABIAttributesPass()); + nestedPM.addPass(spirv::createSPIRVUpdateVCEPass()); + passManager.addPass(createLowerHostCodeToLLVMPass()); + passManager.addPass(createConvertSPIRVToLLVMPass()); +} + +} // namespace + +namespace mlir { +namespace test { +void registerTestSPIRVCPURunnerPipeline() { + PassPipelineRegistration<>( + "test-spirv-cpu-runner-pipeline", + "Runs a series of passes for lowering SPIR-V-dialect MLIR to " + "LLVM-dialect MLIR intended for mlir-spirv-cpu-runner.", + buildTestSPIRVCPURunnerPipeline); +} +} // namespace test +} // namespace mlir diff --git a/mlir/test/mlir-spirv-cpu-runner/double.mlir b/mlir/test/mlir-spirv-cpu-runner/double.mlir index cd551ffb1bd0623..35557ba1e94c003 100644 --- a/mlir/test/mlir-spirv-cpu-runner/double.mlir +++ b/mlir/test/mlir-spirv-cpu-runner/double.mlir @@ -1,4 +1,5 @@ -// RUN: mlir-spirv-cpu-runner %s -e main --entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_test_spirv_cpu_runner_c_wrappers \ +// RUN: mlir-opt %s -test-spirv-cpu-runner-pipeline \ +// RUN: | mlir-spirv-cpu-runner - -e main --entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_test_spirv_cpu_runner_c_wrappers \ // RUN: | FileCheck %s // CHECK: [8, 8, 8, 8, 8, 8] diff --git a/mlir/test/mlir-spirv-cpu-runner/simple_add.mlir b/mlir/test/mlir-spirv-cpu-runner/simple_add.mlir index 119e973e45e4a7b..75675a69a675833 100644 --- a/mlir/test/mlir-spirv-cpu-runner/simple_add.mlir +++ b/mlir/test/mlir-spirv-cpu-runner/simple_add.mlir @@ -1,4 +1,5 @@ -// RUN: mlir-spirv-cpu-runner %s -e main --entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_test_spirv_cpu_runner_c_wrappers \ +// RUN: mlir-opt %s -test-spirv-cpu-runner-pipeline \ +// RUN: | mlir-spirv-cpu-runner - -e main --entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_test_spirv_cpu_runner_c_wrappers \ // RUN: | FileCheck %s // CHECK: data = diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 36b142484bb04a6..002c3900056dee1 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -142,6 +142,7 @@ void registerTestSCFWhileOpBuilderPass(); void registerTestSCFWrapInZeroTripCheckPasses(); void registerTestShapeMappingPass(); void registerTestSliceAnalysisPass(); +void registerTestSPIRVCPURunnerPipeline(); void registerTestSPIRVFuncSignatureConversion(); void registerTestSPIRVVectorUnrolling(); void registerTestTensorCopyInsertionPass(); @@ -278,6 +279,7 @@ void registerTestPasses() { mlir::test::registerTestSCFWrapInZeroTripCheckPasses(); mlir::test::registerTestShapeMappingPass(); mlir::test::registerTestSliceAnalysisPass(); + mlir::test::registerTestSPIRVCPURunnerPipeline(); mlir::test::registerTestSPIRVFuncSignatureConversion(); mlir::test::registerTestSPIRVVectorUnrolling(); mlir::test::registerTestTensorCopyInsertionPass(); diff --git a/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp b/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp index 7e0b51cac806213..22ad1024db4a0b6 100644 --- a/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp +++ b/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp @@ -12,18 +12,12 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h" -#include "mlir/Conversion/GPUToSPIRV/GPUToSPIRVPass.h" -#include "mlir/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" -#include "mlir/Dialect/GPU/Transforms/Passes.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h" -#include "mlir/Dialect/SPIRV/IR/SPIRVOps.h" -#include "mlir/Dialect/SPIRV/Transforms/Passes.h" #include "mlir/ExecutionEngine/JitRunner.h" #include "mlir/ExecutionEngine/OptUtils.h" #include "mlir/Pass/Pass.h" @@ -75,23 +69,6 @@ convertMLIRModule(Operation *op, llvm::LLVMContext &context) { return mainModule; } -static LogicalResult runMLIRPasses(Operation *module, - JitRunnerOptions &options) { - PassManager passManager(module->getContext(), - module->getName().getStringRef()); - if (failed(applyPassManagerCLOptions(passManager))) - return failure(); - passManager.addPass(createGpuKernelOutliningPass()); - passManager.addPass(createConvertGPUToSPIRVPass(/*mapMemorySpace=*/true)); - - OpPassManager &nestedPM = passManager.nest(); - nestedPM.addPass(spirv::createSPIRVLowerABIAttributesPass()); - nestedPM.addPass(spirv::createSPIRVUpdateVCEPass()); - passManager.addPass(createLowerHostCodeToLLVMPass()); - passManager.addPass(createConvertSPIRVToLLVMPass()); - return passManager.run(module); -} - int main(int argc, char **argv) { llvm::InitLLVM y(argc, argv); @@ -99,7 +76,6 @@ int main(int argc, char **argv) { llvm::InitializeNativeTargetAsmPrinter(); mlir::JitRunnerConfig jitRunnerConfig; - jitRunnerConfig.mlirTransformer = runMLIRPasses; jitRunnerConfig.llvmModuleBuilder = convertMLIRModule; mlir::DialectRegistry registry; From cbdfb18794026b0d662d7de1fa39c02ad6227abb Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Fri, 25 Oct 2024 15:39:07 +0100 Subject: [PATCH 04/39] [RISCV] Add Supm extension to RVA23 profiles (#113619) This is mandatory for both RVA23U64 and RVA23S64 in the ratified version of the specification . --- llvm/lib/Target/RISCV/RISCVProfiles.td | 3 ++- llvm/test/CodeGen/RISCV/attributes.ll | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVProfiles.td b/llvm/lib/Target/RISCV/RISCVProfiles.td index 157e087a64da07b..ce7d1973989fc13 100644 --- a/llvm/lib/Target/RISCV/RISCVProfiles.td +++ b/llvm/lib/Target/RISCV/RISCVProfiles.td @@ -73,7 +73,8 @@ defvar RVA23U64Features = !listconcat(RVA22U64Features, FeatureStdExtZcmop, FeatureStdExtZcb, FeatureStdExtZfa, - FeatureStdExtZawrs]); + FeatureStdExtZawrs, + FeatureStdExtSupm]); defvar RVA23S64BaseFeatures = !listconcat(RVA22S64BaseFeatures, [FeatureStdExtSvnapot, diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index e9743d484f776f0..9be9ddd05ee2900 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -578,8 +578,8 @@ ; RVA20S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicsr2p0_zifencei2p0_zmmul1p0_za128rs1p0_ssccptr1p0_sstvala1p0_sstvecd1p0_svade1p0_svbare1p0" ; RVA22U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicsr2p0_zihintpause2p0_zihpm2p0_zmmul1p0_za64rs1p0_zfhmin1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0" ; RVA22S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicsr2p0_zifencei2p0_zihintpause2p0_zihpm2p0_zmmul1p0_za64rs1p0_zfhmin1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_ssccptr1p0_sscounterenw1p0_sstvala1p0_sstvecd1p0_svade1p0_svbare1p0_svinval1p0_svpbmt1p0" -; RVA23U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" -; RVA23S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_h1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zifencei2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_shcounterenw1p0_shgatpa1p0_shtvala1p0_shvsatpa1p0_shvstvala1p0_shvstvecd1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_ssnpm1p0_ssstateen1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0" +; RVA23U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_supm1p0" +; RVA23S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_h1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zifencei2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_shcounterenw1p0_shgatpa1p0_shtvala1p0_shvsatpa1p0_shvstvala1p0_shvstvecd1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_ssnpm1p0_ssstateen1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_supm1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0" ; RVB23U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0" ; RVB23S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zifencei2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0" ; RVM23U32: .attribute 5, "rv32i2p1_m2p0_zicbop1p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zimop1p0_zmmul1p0_zca1p0_zcb1p0_zce1p0_zcmop1p0_zcmp1p0_zcmt1p0_zba1p0_zbb1p0_zbs1p0" From bbc0e631d2d3facd5952aeafc7400761813acc3a Mon Sep 17 00:00:00 2001 From: Thomas Preud'homme Date: Fri, 25 Oct 2024 15:41:39 +0100 Subject: [PATCH 05/39] [MLIR] Remove unneeded LLVMDialect.h include in ControlFlowToSCF.cpp (#113560) This fixes the following failure when doing a clean build (in particular no .ninja* lying around) of lib/libMLIRControlFlowToSCF.a only: ``` In file included from llvm/include/llvm/IR/Module.h:22, from mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h:37, from mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp:19 llvm/include/llvm/IR/Attributes.h:90:14: fatal error: llvm/IR/Attributes.inc: No such file or directory ``` --- mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp b/mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp index d3ee89743da9db5..1c592d665f3e4c5 100644 --- a/mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp +++ b/mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp @@ -16,7 +16,6 @@ #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/UB/IR/UBOps.h" #include "mlir/Pass/Pass.h" From e47bf3d08d51306f2e534951a1b77043dc540ceb Mon Sep 17 00:00:00 2001 From: Kai Nacke Date: Wed, 23 Oct 2024 16:13:39 -0400 Subject: [PATCH 06/39] [JIT] Fix crash in unit tests The unit tests `ReOptimizeLayerTest.BasicReOptimization` and `JITLinkRedirectionManagerTest.BasicRedirectionOperation` are failing for me with the error: ``` Program aborted due to an unhandled Error: Error value was Success. (Note: Success values must still be checked prior to being destroyed). ``` The error is raised when a value is assigned to `Err`, due to the the missing `ErrorAsOutParameter`. The fix is to move the error handling out of the constructor. --- .../Orc/JITLinkRedirectableSymbolManager.h | 33 +++++++++---------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h b/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h index 52f284c89bdade5..ef42cc5f798fd93 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h @@ -26,12 +26,16 @@ class JITLinkRedirectableSymbolManager : public RedirectableSymbolManager, /// Create redirection manager that uses JITLink based implementaion. static Expected> Create(ObjectLinkingLayer &ObjLinkingLayer, JITDylib &JD) { - Error Err = Error::success(); - auto RM = std::unique_ptr( - new JITLinkRedirectableSymbolManager(ObjLinkingLayer, JD, Err)); - if (Err) - return Err; - return std::move(RM); + auto AnonymousPtrCreator(jitlink::getAnonymousPointerCreator( + ObjLinkingLayer.getExecutionSession().getTargetTriple())); + auto PtrJumpStubCreator(jitlink::getPointerJumpStubCreator( + ObjLinkingLayer.getExecutionSession().getTargetTriple())); + if (!AnonymousPtrCreator || !PtrJumpStubCreator) + return make_error("Architecture not supported", + inconvertibleErrorCode()); + return std::unique_ptr( + new JITLinkRedirectableSymbolManager( + ObjLinkingLayer, JD, AnonymousPtrCreator, PtrJumpStubCreator)); } void emitRedirectableSymbols(std::unique_ptr R, @@ -52,18 +56,13 @@ class JITLinkRedirectableSymbolManager : public RedirectableSymbolManager, constexpr static StringRef JumpStubTableName = "$IND_JUMP_"; constexpr static StringRef StubPtrTableName = "$__IND_JUMP_PTRS"; - JITLinkRedirectableSymbolManager(ObjectLinkingLayer &ObjLinkingLayer, - JITDylib &JD, Error &Err) + JITLinkRedirectableSymbolManager( + ObjectLinkingLayer &ObjLinkingLayer, JITDylib &JD, + jitlink::AnonymousPointerCreator &AnonymousPtrCreator, + jitlink::PointerJumpStubCreator &PtrJumpStubCreator) : ObjLinkingLayer(ObjLinkingLayer), JD(JD), - AnonymousPtrCreator(jitlink::getAnonymousPointerCreator( - ObjLinkingLayer.getExecutionSession().getTargetTriple())), - PtrJumpStubCreator(jitlink::getPointerJumpStubCreator( - ObjLinkingLayer.getExecutionSession().getTargetTriple())) { - if (!AnonymousPtrCreator || !PtrJumpStubCreator) - Err = make_error("Architecture not supported", - inconvertibleErrorCode()); - if (Err) - return; + AnonymousPtrCreator(std::move(AnonymousPtrCreator)), + PtrJumpStubCreator(std::move(PtrJumpStubCreator)) { ObjLinkingLayer.getExecutionSession().registerResourceManager(*this); } From aba39c3974c7e43a83a9d647dca9b67caca8572e Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Fri, 25 Oct 2024 17:40:00 +0200 Subject: [PATCH 07/39] [System] Precommit of test for #112491 (#113704) --- .../SystemZ/vec-elt-insertion.ll | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll new file mode 100644 index 000000000000000..eb8dd72e0304d91 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll @@ -0,0 +1,128 @@ +; RUN: opt < %s -mtriple=s390x-unknown-linux -mcpu=z16 -S -passes=slp-vectorizer \ +; RUN: -pass-remarks-output=%t | FileCheck %s +; RUN: cat %t | FileCheck -check-prefix=REMARK %s +; +; NB! This is a pre-commit version (for #112491) with current codegen and remarks. +; +; Test functions that (at least currently) only gets vectorized if the +; insertion cost for an element load is counted as free. + +; This function needs the free element load to be recognized in SLP +; getGatherCost(). +define void @fun0(ptr nocapture %0, double %1) { +; CHECK-LABEL: define void @fun0( +; CHECK: fmul double +; CHECK: call double @llvm.fmuladd.f64( +; CHECK-NEXT: call double @llvm.fmuladd.f64( +; CHECK-NEXT: call double @llvm.sqrt.f64( +; CHECK: fmul double +; CHECK: call double @llvm.fmuladd.f64( +; CHECK-NEXT: call double @llvm.fmuladd.f64( +; CHECK-NEXT: call double @llvm.sqrt.f64( +; +; REMARK-LABEL: Function: fun0 +; REMARK: Args: +; REMARK-NEXT: - String: 'List vectorization was possible but not beneficial with cost ' +; REMARK-NEXT: - Cost: '0' + + %3 = fmul double %1, 2.000000e+00 + %4 = tail call double @llvm.fmuladd.f64(double %3, double %3, double 0.000000e+00) + %5 = tail call double @llvm.fmuladd.f64(double %3, double %3, double %4) + %sqrt1 = tail call double @llvm.sqrt.f64(double %5) + %6 = load double, ptr %0, align 8 + %7 = fmul double %6, 2.000000e+00 + %8 = tail call double @llvm.fmuladd.f64(double %7, double %7, double 0.000000e+00) + %9 = tail call double @llvm.fmuladd.f64(double %7, double %7, double %8) + %sqrt = tail call double @llvm.sqrt.f64(double %9) + %10 = fadd double %sqrt1, %sqrt + store double %10, ptr %0, align 8 + ret void +} + +; This function needs the element-load to be recognized in SystemZ +; getVectorInstrCost(). +define void @fun1(double %0) { +; CHECK-LABEL: define void @fun1( +; CHECK: phi double +; CHECK-NEXT: phi double +; CHECK-NEXT: phi double +; CHECK-NEXT: phi double +; CHECK-NEXT: phi double +; CHECK-NEXT: phi double +; CHECK-NEXT: fsub double +; CHECK-NEXT: fsub double +; CHECK-NEXT: fmul double +; CHECK-NEXT: fmul double +; CHECK-NEXT: fsub double +; CHECK-NEXT: fsub double +; CHECK-NEXT: call double @llvm.fmuladd.f64( +; CHECK-NEXT: call double @llvm.fmuladd.f64( +; CHECK-NEXT: fsub double +; CHECK-NEXT: fsub double +; CHECK-NEXT: call double @llvm.fmuladd.f64( +; CHECK-NEXT: call double @llvm.fmuladd.f64( +; CHECK: fcmp olt double +; CHECK-NEXT: fcmp olt double +; CHECK-NEXT: or i1 +; +; REMARK-LABEL: Function: fun1 +; REMARK: Args: +; REMARK: - String: 'List vectorization was possible but not beneficial with cost ' +; REMARK-NEXT: - Cost: '0' + + br label %2 + +2: + %3 = phi double [ poison, %1 ], [ poison, %2 ] + %4 = phi double [ undef, %1 ], [ poison, %2 ] + %5 = phi double [ 0.000000e+00, %1 ], [ poison, %2 ] + %6 = phi double [ 0.000000e+00, %1 ], [ poison, %2 ] + %7 = phi double [ 0.000000e+00, %1 ], [ poison, %2 ] + %8 = phi double [ 0.000000e+00, %1 ], [ %21, %2 ] + %9 = fsub double 0.000000e+00, %8 + %10 = fsub double 0.000000e+00, %7 + %11 = fmul double %9, 0.000000e+00 + %12 = fmul double %10, 0.000000e+00 + %13 = fsub double 0.000000e+00, %6 + %14 = fsub double 0.000000e+00, %5 + %15 = tail call double @llvm.fmuladd.f64(double %13, double %13, double %11) + %16 = tail call double @llvm.fmuladd.f64(double %14, double %14, double %12) + %17 = fsub double 0.000000e+00, %4 + %18 = fsub double 0.000000e+00, %3 + %19 = tail call double @llvm.fmuladd.f64(double %17, double %17, double %15) + %20 = tail call double @llvm.fmuladd.f64(double %18, double %18, double %16) + %21 = load double, ptr null, align 8 + %22 = fcmp olt double %19, %0 + %23 = fcmp olt double %20, 0.000000e+00 + %24 = or i1 %23, %22 + br label %2 +} + +declare double @llvm.fmuladd.f64(double, double, double) + +; This should *not* be vectorized as the insertion into the vector isn't free, +; which is recognized in SystemZTTImpl::getScalarizationOverhead(). +define void @fun2(ptr %0, ptr %Dst) { +; CHECK-LABEL: define void @fun2( +; CHECK: insertelement +; CHECK: store <2 x i64> +; +; REMARK-LABEL: Function: fun2 +; REMARK: Args: +; REMARK-NEXT: - String: 'Stores SLP vectorized with cost ' +; REMARK-NEXT: - Cost: '-1' + + %3 = load i64, ptr %0, align 8 + %4 = icmp eq i64 %3, 0 + br i1 %4, label %5, label %6 + +5: + ret void + +6: + %7 = getelementptr i8, ptr %Dst, i64 24 + store i64 %3, ptr %7, align 8 + %8 = getelementptr i8, ptr %Dst, i64 16 + store i64 0, ptr %8, align 8 + br label %5 +} From 81e536ec87a108d012cf9156a2c3fc672fb92155 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Fri, 25 Oct 2024 15:43:47 +0000 Subject: [PATCH 08/39] [clang][test] Fix typo in arm-mfp8.cpp New test added by https://github.com/llvm/llvm-project/pull/97277. --- clang/test/AST/arm-mfp8.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/AST/arm-mfp8.cpp b/clang/test/AST/arm-mfp8.cpp index a00d055f7d96794..51bebba067eb9f6 100644 --- a/clang/test/AST/arm-mfp8.cpp +++ b/clang/test/AST/arm-mfp8.cpp @@ -69,7 +69,7 @@ class C1 { //CHECK-NEXT: | | `-CompoundStmt {{.*}} //CHECK-NEXT: | | `-ReturnStmt {{.*}} //CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} '__mfp8':'__MFloat8_t' -//CHECK-NEXT: | | `-DeclRefExpr {{.*}} '__mfp8':'__MFloat8_t' lvalue ParmVar {{.*}}8 'arg' '__mfp8':'__MFloat8_t' +//CHECK-NEXT: | | `-DeclRefExpr {{.*}} '__mfp8':'__MFloat8_t' lvalue ParmVar {{.*}} 'arg' '__mfp8':'__MFloat8_t' //CHECK-NEXT: | `-CXXMethodDecl {{.*}} func2c '__mfp8 (__mfp8)' static implicit-inline //CHECK-NEXT: | |-ParmVarDecl {{.*}} arg '__mfp8':'__MFloat8_t' //CHECK-NEXT: | `-CompoundStmt {{.*}} From 5c20891b2bb60f82dd82a8e90b111f8c13a13ad4 Mon Sep 17 00:00:00 2001 From: Ryosuke Niwa Date: Fri, 25 Oct 2024 08:52:56 -0700 Subject: [PATCH 09/39] [WebKit Checkers] Allow a guardian CheckedPtr/CheckedRef (#110222) This PR makes WebKit checkers allow a guardian variable which is CheckedPtr or CheckedRef as in addition to RefPtr or Ref. --- .../Checkers/WebKit/ASTUtils.cpp | 16 +++--- .../Checkers/WebKit/PtrTypesSemantics.cpp | 43 +++++++++++++--- .../Checkers/WebKit/PtrTypesSemantics.h | 22 ++++++-- .../WebKit/UncountedCallArgsChecker.cpp | 2 + .../WebKit/UncountedLocalVarsChecker.cpp | 1 + .../Checkers/WebKit/call-args-checked.cpp | 46 +++++++++++++++++ .../Analysis/Checkers/WebKit/mock-types.h | 16 ++++-- .../Checkers/WebKit/uncounted-local-vars.cpp | 51 +++++++++++++++++++ 8 files changed, 177 insertions(+), 20 deletions(-) create mode 100644 clang/test/Analysis/Checkers/WebKit/call-args-checked.cpp diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp index b7b2f8a16f07b31..9d34dfd3cea636b 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp @@ -17,6 +17,10 @@ namespace clang { +bool isSafePtr(clang::CXXRecordDecl *Decl) { + return isRefCounted(Decl) || isCheckedPtr(Decl); +} + bool tryToFindPtrOrigin( const Expr *E, bool StopAtFirstRefCountedObj, std::function callback) { @@ -31,7 +35,7 @@ bool tryToFindPtrOrigin( } if (auto *tempExpr = dyn_cast(E)) { if (auto *C = tempExpr->getConstructor()) { - if (auto *Class = C->getParent(); Class && isRefCounted(Class)) + if (auto *Class = C->getParent(); Class && isSafePtr(Class)) return callback(E, true); break; } @@ -56,7 +60,7 @@ bool tryToFindPtrOrigin( if (StopAtFirstRefCountedObj) { if (auto *ConversionFunc = dyn_cast_or_null(cast->getConversionFunction())) { - if (isCtorOfRefCounted(ConversionFunc)) + if (isCtorOfSafePtr(ConversionFunc)) return callback(E, true); } } @@ -68,7 +72,7 @@ bool tryToFindPtrOrigin( if (auto *call = dyn_cast(E)) { if (auto *memberCall = dyn_cast(call)) { if (auto *decl = memberCall->getMethodDecl()) { - std::optional IsGetterOfRefCt = isGetterOfRefCounted(decl); + std::optional IsGetterOfRefCt = isGetterOfSafePtr(decl); if (IsGetterOfRefCt && *IsGetterOfRefCt) { E = memberCall->getImplicitObjectArgument(); if (StopAtFirstRefCountedObj) { @@ -87,7 +91,7 @@ bool tryToFindPtrOrigin( } if (auto *callee = call->getDirectCallee()) { - if (isCtorOfRefCounted(callee)) { + if (isCtorOfRefCounted(callee) || isCtorOfCheckedPtr(callee)) { if (StopAtFirstRefCountedObj) return callback(E, true); @@ -95,7 +99,7 @@ bool tryToFindPtrOrigin( continue; } - if (isRefType(callee->getReturnType())) + if (isSafePtrType(callee->getReturnType())) return callback(E, true); if (isSingleton(callee)) @@ -114,7 +118,7 @@ bool tryToFindPtrOrigin( } if (auto *ObjCMsgExpr = dyn_cast(E)) { if (auto *Method = ObjCMsgExpr->getMethodDecl()) { - if (isRefType(Method->getReturnType())) + if (isSafePtrType(Method->getReturnType())) return callback(E, true); } } diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp index 71440e6d08a1c9a..2293dcf1d4bd643 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp @@ -135,7 +135,16 @@ bool isCtorOfRefCounted(const clang::FunctionDecl *F) { || FunctionName == "Identifier"; } -bool isRefType(const clang::QualType T) { +bool isCtorOfCheckedPtr(const clang::FunctionDecl *F) { + assert(F); + return isCheckedPtr(safeGetName(F)); +} + +bool isCtorOfSafePtr(const clang::FunctionDecl *F) { + return isCtorOfRefCounted(F) || isCtorOfCheckedPtr(F); +} + +bool isSafePtrType(const clang::QualType T) { QualType type = T; while (!type.isNull()) { if (auto *elaboratedT = type->getAs()) { @@ -145,7 +154,7 @@ bool isRefType(const clang::QualType T) { if (auto *specialT = type->getAs()) { if (auto *decl = specialT->getTemplateName().getAsTemplateDecl()) { auto name = decl->getNameAsString(); - return isRefType(name); + return isRefType(name) || isCheckedPtr(name); } return false; } @@ -177,6 +186,12 @@ std::optional isUncounted(const CXXRecordDecl* Class) return (*IsRefCountable); } +std::optional isUnchecked(const CXXRecordDecl *Class) { + if (isCheckedPtr(Class)) + return false; // Cheaper than below + return isCheckedPtrCapable(Class); +} + std::optional isUncountedPtr(const QualType T) { if (T->isPointerType() || T->isReferenceType()) { if (auto *CXXRD = T->getPointeeCXXRecordDecl()) @@ -185,8 +200,16 @@ std::optional isUncountedPtr(const QualType T) { return false; } -std::optional isGetterOfRefCounted(const CXXMethodDecl* M) -{ +std::optional isUnsafePtr(const QualType T) { + if (T->isPointerType() || T->isReferenceType()) { + if (auto *CXXRD = T->getPointeeCXXRecordDecl()) { + return isUncounted(CXXRD) || isUnchecked(CXXRD); + } + } + return false; +} + +std::optional isGetterOfSafePtr(const CXXMethodDecl *M) { assert(M); if (isa(M)) { @@ -194,6 +217,9 @@ std::optional isGetterOfRefCounted(const CXXMethodDecl* M) auto className = safeGetName(calleeMethodsClass); auto method = safeGetName(M); + if (isCheckedPtr(className) && (method == "get" || method == "ptr")) + return true; + if ((isRefType(className) && (method == "get" || method == "ptr")) || ((className == "String" || className == "AtomString" || className == "AtomStringImpl" || className == "UniqueString" || @@ -205,7 +231,12 @@ std::optional isGetterOfRefCounted(const CXXMethodDecl* M) // FIXME: Currently allowing any Ref -> whatever cast. if (isRefType(className)) { if (auto *maybeRefToRawOperator = dyn_cast(M)) - return isUncountedPtr(maybeRefToRawOperator->getConversionType()); + return isUnsafePtr(maybeRefToRawOperator->getConversionType()); + } + + if (isCheckedPtr(className)) { + if (auto *maybeRefToRawOperator = dyn_cast(M)) + return isUnsafePtr(maybeRefToRawOperator->getConversionType()); } } return false; @@ -448,7 +479,7 @@ class TrivialFunctionAnalysisVisitor if (!Callee) return false; - std::optional IsGetterOfRefCounted = isGetterOfRefCounted(Callee); + std::optional IsGetterOfRefCounted = isGetterOfSafePtr(Callee); if (IsGetterOfRefCounted && *IsGetterOfRefCounted) return true; diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h index 8e6aadf63b6d679..4b41ca96e1df1d3 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h @@ -63,18 +63,30 @@ std::optional isUncounted(const clang::CXXRecordDecl* Class); /// class, false if not, std::nullopt if inconclusive. std::optional isUncountedPtr(const clang::QualType T); -/// \returns true if Name is a RefPtr, Ref, or its variant, false if not. -bool isRefType(const std::string &Name); +/// \returns true if \p T is a RefPtr, Ref, CheckedPtr, CheckedRef, or its +/// variant, false if not. +bool isSafePtrType(const clang::QualType T); /// \returns true if \p F creates ref-countable object from uncounted parameter, /// false if not. bool isCtorOfRefCounted(const clang::FunctionDecl *F); -/// \returns true if \p T is RefPtr, Ref, or its variant, false if not. -bool isRefType(const clang::QualType T); +/// \returns true if \p F creates checked ptr object from uncounted parameter, +/// false if not. +bool isCtorOfCheckedPtr(const clang::FunctionDecl *F); + +/// \returns true if \p F creates ref-countable or checked ptr object from +/// uncounted parameter, false if not. +bool isCtorOfSafePtr(const clang::FunctionDecl *F); + +/// \returns true if \p Name is RefPtr, Ref, or its variant, false if not. +bool isRefType(const std::string &Name); + +/// \returns true if \p Name is CheckedRef or CheckedPtr, false if not. +bool isCheckedPtr(const std::string &Name); /// \returns true if \p M is getter of a ref-counted class, false if not. -std::optional isGetterOfRefCounted(const clang::CXXMethodDecl* Method); +std::optional isGetterOfSafePtr(const clang::CXXMethodDecl *Method); /// \returns true if \p F is a conversion between ref-countable or ref-counted /// pointer types. diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp index cea3503fa2c314d..1a5a7309a54f167 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp @@ -96,6 +96,8 @@ class UncountedCallArgsChecker auto name = safeGetName(MD); if (name == "ref" || name == "deref") return; + if (name == "incrementPtrCount" || name == "decrementPtrCount") + return; } auto *E = MemberCallExpr->getImplicitObjectArgument(); QualType ArgType = MemberCallExpr->getObjectType().getCanonicalType(); diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp index 81d21100de878db..5cdf047738abcb2 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp @@ -227,6 +227,7 @@ class UncountedLocalVarsChecker if (MaybeGuardianArgCXXRecord) { if (MaybeGuardian->isLocalVarDecl() && (isRefCounted(MaybeGuardianArgCXXRecord) || + isCheckedPtr(MaybeGuardianArgCXXRecord) || isRefcountedStringsHack(MaybeGuardian)) && isGuardedScopeEmbeddedInGuardianScope( V, MaybeGuardian)) diff --git a/clang/test/Analysis/Checkers/WebKit/call-args-checked.cpp b/clang/test/Analysis/Checkers/WebKit/call-args-checked.cpp new file mode 100644 index 000000000000000..49b6bfcd7cadfdc --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/call-args-checked.cpp @@ -0,0 +1,46 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UncountedCallArgsChecker -verify %s + +#include "mock-types.h" + +RefCountableAndCheckable* makeObj(); +CheckedRef makeObjChecked(); +void someFunction(RefCountableAndCheckable*); + +namespace call_args_unchecked_uncounted { + +static void foo() { + someFunction(makeObj()); + // expected-warning@-1{{Call argument is uncounted and unsafe [alpha.webkit.UncountedCallArgsChecker]}} +} + +} // namespace call_args_checked + +namespace call_args_checked { + +static void foo() { + CheckedPtr ptr = makeObj(); + someFunction(ptr.get()); +} + +static void bar() { + someFunction(CheckedPtr { makeObj() }.get()); +} + +static void baz() { + someFunction(makeObjChecked().ptr()); +} + +} // namespace call_args_checked + +namespace call_args_default { + +void someFunction(RefCountableAndCheckable* = makeObj()); +// expected-warning@-1{{Call argument is uncounted and unsafe [alpha.webkit.UncountedCallArgsChecker]}} +void otherFunction(RefCountableAndCheckable* = makeObjChecked().ptr()); + +void foo() { + someFunction(); + otherFunction(); +} + +} diff --git a/clang/test/Analysis/Checkers/WebKit/mock-types.h b/clang/test/Analysis/Checkers/WebKit/mock-types.h index 933b4c5e62a79cc..8d8a90f0afae0e1 100644 --- a/clang/test/Analysis/Checkers/WebKit/mock-types.h +++ b/clang/test/Analysis/Checkers/WebKit/mock-types.h @@ -114,8 +114,8 @@ template struct CheckedRef { public: CheckedRef() : t{} {}; - CheckedRef(T &t) : t(t) { t->incrementPtrCount(); } - CheckedRef(const CheckedRef& o) : t(o.t) { if (t) t->incrementPtrCount(); } + CheckedRef(T &t) : t(&t) { t.incrementPtrCount(); } + CheckedRef(const CheckedRef &o) : t(o.t) { if (t) t->incrementPtrCount(); } ~CheckedRef() { if (t) t->decrementPtrCount(); } T &get() { return *t; } T *ptr() { return t; } @@ -135,7 +135,7 @@ template struct CheckedPtr { if (t) t->incrementPtrCount(); } - CheckedPtr(Ref&& o) + CheckedPtr(Ref &&o) : t(o.leakRef()) { } ~CheckedPtr() { @@ -156,4 +156,14 @@ class CheckedObj { void decrementPtrCount(); }; +class RefCountableAndCheckable { +public: + void incrementPtrCount() const; + void decrementPtrCount() const; + void ref() const; + void deref() const; + void method(); + int trivial() { return 0; } +}; + #endif diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp index b5f6b8535bf4181..1c0df42cdda663c 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp @@ -290,6 +290,57 @@ void foo() { } // namespace local_assignment_to_global +namespace local_refcountable_checkable_object { + +RefCountableAndCheckable* provide_obj(); + +void local_raw_ptr() { + RefCountableAndCheckable* a = nullptr; + // expected-warning@-1{{Local variable 'a' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + a = provide_obj(); + a->method(); +} + +void local_checked_ptr() { + CheckedPtr a = nullptr; + a = provide_obj(); + a->method(); +} + +void local_var_with_guardian_checked_ptr() { + CheckedPtr a = provide_obj(); + { + auto* b = a.get(); + b->method(); + } +} + +void local_var_with_guardian_checked_ptr_with_assignment() { + CheckedPtr a = provide_obj(); + { + RefCountableAndCheckable* b = a.get(); + // expected-warning@-1{{Local variable 'b' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + b = provide_obj(); + b->method(); + } +} + +void local_var_with_guardian_checked_ref() { + CheckedRef a = *provide_obj(); + { + RefCountableAndCheckable& b = a; + b.method(); + } +} + +void static_var() { + static RefCountableAndCheckable* a = nullptr; + // expected-warning@-1{{Static local variable 'a' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + a = provide_obj(); +} + +} // namespace local_refcountable_checkable_object + namespace local_var_in_recursive_function { struct TreeNode { From 1f2b7ae6d78906df4f0c06961e3c9ed227986acf Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 25 Oct 2024 12:28:55 -0400 Subject: [PATCH 10/39] [libc++] Refactor locale_guard (#113694) Rename __libcpp_locale_guard to just __locale_guard, since there's no reason for it to have __libcpp_ in its name -- it's just an internal utility. Also, define __locale_guard unconditionally of _LIBCPP_LOCALE__L_EXTENSIONS, since that header is only used on Windows (where it has a custom definition) or from bsd_locale_fallbacks.h, which is only included when the L extensions are not provided. --- libcxx/include/CMakeLists.txt | 2 +- .../locale_base_api/bsd_locale_fallbacks.h | 30 ++++++------- .../{locale_base_api => }/locale_guard.h | 42 +++++++++---------- libcxx/include/module.modulemap | 2 +- libcxx/src/iostream.cpp | 4 +- libcxx/src/support/win32/locale_win32.cpp | 34 +++++++-------- 6 files changed, 56 insertions(+), 58 deletions(-) rename libcxx/include/__locale_dir/{locale_base_api => }/locale_guard.h (73%) diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 975adc03ec81da0..63aa74e09bb1a27 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -496,11 +496,11 @@ set(files __locale_dir/locale_base_api/bsd_locale_fallbacks.h __locale_dir/locale_base_api/fuchsia.h __locale_dir/locale_base_api/ibm.h - __locale_dir/locale_base_api/locale_guard.h __locale_dir/locale_base_api/musl.h __locale_dir/locale_base_api/newlib.h __locale_dir/locale_base_api/openbsd.h __locale_dir/locale_base_api/win32.h + __locale_dir/locale_guard.h __math/abs.h __math/copysign.h __math/error_functions.h diff --git a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h index 5f99c7aea02a96a..ae2db6ae70bebcb 100644 --- a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h +++ b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h @@ -13,7 +13,7 @@ #ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H #define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H -#include <__locale_dir/locale_base_api/locale_guard.h> +#include <__locale_dir/locale_guard.h> #include #include #include @@ -29,64 +29,64 @@ _LIBCPP_BEGIN_NAMESPACE_STD inline _LIBCPP_HIDE_FROM_ABI decltype(MB_CUR_MAX) __libcpp_mb_cur_max_l(locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return MB_CUR_MAX; } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI wint_t __libcpp_btowc_l(int __c, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return btowc(__c); } inline _LIBCPP_HIDE_FROM_ABI int __libcpp_wctob_l(wint_t __c, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return wctob(__c); } inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_wcsnrtombs_l(char* __dest, const wchar_t** __src, size_t __nwc, size_t __len, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return wcsnrtombs(__dest, __src, __nwc, __len, __ps); } inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_wcrtomb_l(char* __s, wchar_t __wc, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return wcrtomb(__s, __wc, __ps); } inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_mbsnrtowcs_l(wchar_t* __dest, const char** __src, size_t __nms, size_t __len, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return mbsnrtowcs(__dest, __src, __nms, __len, __ps); } inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_mbrtowc_l(wchar_t* __pwc, const char* __s, size_t __n, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return mbrtowc(__pwc, __s, __n, __ps); } inline _LIBCPP_HIDE_FROM_ABI int __libcpp_mbtowc_l(wchar_t* __pwc, const char* __pmb, size_t __max, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return mbtowc(__pwc, __pmb, __max); } inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_mbrlen_l(const char* __s, size_t __n, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return mbrlen(__s, __n, __ps); } #endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI lconv* __libcpp_localeconv_l(locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return localeconv(); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_mbsrtowcs_l(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return mbsrtowcs(__dest, __src, __len, __ps); } #endif @@ -95,7 +95,7 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __libcpp_snprintf_l( char* __s, size_t __n, locale_t __l, const char* __format, ...) { va_list __va; va_start(__va, __format); - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); int __res = vsnprintf(__s, __n, __format, __va); va_end(__va); return __res; @@ -105,7 +105,7 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __libcpp_asprintf_l( char** __s, locale_t __l, const char* __format, ...) { va_list __va; va_start(__va, __format); - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); int __res = vasprintf(__s, __format, __va); va_end(__va); return __res; @@ -115,7 +115,7 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __libcpp_sscanf_l( const char* __s, locale_t __l, const char* __format, ...) { va_list __va; va_start(__va, __format); - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); int __res = vsscanf(__s, __format, __va); va_end(__va); return __res; diff --git a/libcxx/include/__locale_dir/locale_base_api/locale_guard.h b/libcxx/include/__locale_dir/locale_guard.h similarity index 73% rename from libcxx/include/__locale_dir/locale_base_api/locale_guard.h rename to libcxx/include/__locale_dir/locale_guard.h index 7d15f2d253adc39..e0c414c001c41f1 100644 --- a/libcxx/include/__locale_dir/locale_base_api/locale_guard.h +++ b/libcxx/include/__locale_dir/locale_guard.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_LOCALE_GUARD_H -#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_LOCALE_GUARD_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_GUARD_H +#define _LIBCPP___LOCALE_DIR_LOCALE_GUARD_H #include <__config> #include <__locale> // for locale_t @@ -19,23 +19,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_LOCALE__L_EXTENSIONS) -struct __libcpp_locale_guard { - _LIBCPP_HIDE_FROM_ABI __libcpp_locale_guard(locale_t& __loc) : __old_loc_(uselocale(__loc)) {} - - _LIBCPP_HIDE_FROM_ABI ~__libcpp_locale_guard() { - if (__old_loc_) - uselocale(__old_loc_); - } - - locale_t __old_loc_; - - __libcpp_locale_guard(__libcpp_locale_guard const&) = delete; - __libcpp_locale_guard& operator=(__libcpp_locale_guard const&) = delete; -}; -#elif defined(_LIBCPP_MSVCRT_LIKE) -struct __libcpp_locale_guard { - __libcpp_locale_guard(locale_t __l) : __status(_configthreadlocale(_ENABLE_PER_THREAD_LOCALE)) { +#if defined(_LIBCPP_MSVCRT_LIKE) +struct __locale_guard { + __locale_guard(locale_t __l) : __status(_configthreadlocale(_ENABLE_PER_THREAD_LOCALE)) { // Setting the locale can be expensive even when the locale given is // already the current locale, so do an explicit check to see if the // current locale is already the one we want. @@ -51,7 +37,7 @@ struct __libcpp_locale_guard { __setlocale(__l.__get_locale()); } } - ~__libcpp_locale_guard() { + ~__locale_guard() { // The CRT documentation doesn't explicitly say, but setlocale() does the // right thing when given a semicolon-separated list of locale settings // for the different categories in the same format as returned by @@ -71,8 +57,22 @@ struct __libcpp_locale_guard { int __status; char* __locale_all = nullptr; }; +#else +struct __locale_guard { + _LIBCPP_HIDE_FROM_ABI __locale_guard(locale_t& __loc) : __old_loc_(uselocale(__loc)) {} + + _LIBCPP_HIDE_FROM_ABI ~__locale_guard() { + if (__old_loc_) + uselocale(__old_loc_); + } + + locale_t __old_loc_; + + __locale_guard(__locale_guard const&) = delete; + __locale_guard& operator=(__locale_guard const&) = delete; +}; #endif _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_LOCALE_GUARD_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_GUARD_H diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index b429d7cff702b81..c79070c318759db 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -1443,7 +1443,7 @@ module std [system] { module locale { header "locale" header "__locale_dir/locale_base_api.h" - header "__locale_dir/locale_base_api/locale_guard.h" + header "__locale_dir/locale_guard.h" module locale_base_api { textual header "__locale_dir/locale_base_api/android.h" textual header "__locale_dir/locale_base_api/bsd_locale_defaults.h" diff --git a/libcxx/src/iostream.cpp b/libcxx/src/iostream.cpp index c5ad77a01916084..48d2fdb866a332c 100644 --- a/libcxx/src/iostream.cpp +++ b/libcxx/src/iostream.cpp @@ -12,7 +12,7 @@ #include #ifdef _LIBCPP_MSVCRT_LIKE -# include <__locale_dir/locale_base_api/locale_guard.h> +# include <__locale_dir/locale_guard.h> #endif #define _str(s) #s @@ -109,7 +109,7 @@ static void force_locale_initialization() { static bool once = []() { auto loc = newlocale(LC_ALL_MASK, "C", 0); { - __libcpp_locale_guard g(loc); // forces initialization of locale TLS + __locale_guard g(loc); // forces initialization of locale TLS ((void)g); } freelocale(loc); diff --git a/libcxx/src/support/win32/locale_win32.cpp b/libcxx/src/support/win32/locale_win32.cpp index 57ef94932ba0a76..2a08e97b8645b40 100644 --- a/libcxx/src/support/win32/locale_win32.cpp +++ b/libcxx/src/support/win32/locale_win32.cpp @@ -11,12 +11,10 @@ #include #include -#include <__locale_dir/locale_base_api/locale_guard.h> +#include <__locale_dir/locale_guard.h> int __libcpp_vasprintf(char** sptr, const char* __restrict fmt, va_list ap); -using std::__libcpp_locale_guard; - // FIXME: base and mask currently unused. Needs manual work to construct the new locale locale_t newlocale(int /*mask*/, const char* locale, locale_t /*base*/) { return {_create_locale(LC_ALL, locale), locale}; @@ -26,33 +24,33 @@ decltype(MB_CUR_MAX) MB_CUR_MAX_L(locale_t __l) { #if defined(_LIBCPP_MSVCRT) return ___mb_cur_max_l_func(__l); #else - __libcpp_locale_guard __current(__l); + std::__locale_guard __current(__l); return MB_CUR_MAX; #endif } lconv* localeconv_l(locale_t& loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); lconv* lc = localeconv(); if (!lc) return lc; return loc.__store_lconv(lc); } size_t mbrlen_l(const char* __restrict s, size_t n, mbstate_t* __restrict ps, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return mbrlen(s, n, ps); } size_t mbsrtowcs_l(wchar_t* __restrict dst, const char** __restrict src, size_t len, mbstate_t* __restrict ps, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return mbsrtowcs(dst, src, len, ps); } size_t wcrtomb_l(char* __restrict s, wchar_t wc, mbstate_t* __restrict ps, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return wcrtomb(s, wc, ps); } size_t mbrtowc_l(wchar_t* __restrict pwc, const char* __restrict s, size_t n, mbstate_t* __restrict ps, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return mbrtowc(pwc, s, n, ps); } size_t mbsnrtowcs_l(wchar_t* __restrict dst, @@ -61,7 +59,7 @@ size_t mbsnrtowcs_l(wchar_t* __restrict dst, size_t len, mbstate_t* __restrict ps, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return mbsnrtowcs(dst, src, nms, len, ps); } size_t wcsnrtombs_l(char* __restrict dst, @@ -70,15 +68,15 @@ size_t wcsnrtombs_l(char* __restrict dst, size_t len, mbstate_t* __restrict ps, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return wcsnrtombs(dst, src, nwc, len, ps); } wint_t btowc_l(int c, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return btowc(c); } int wctob_l(wint_t c, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return wctob(c); } @@ -90,7 +88,7 @@ int snprintf_l(char* ret, size_t n, locale_t loc, const char* format, ...) { int result = __stdio_common_vsprintf( _CRT_INTERNAL_LOCAL_PRINTF_OPTIONS | _CRT_INTERNAL_PRINTF_STANDARD_SNPRINTF_BEHAVIOR, ret, n, format, loc, ap); #else - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); _LIBCPP_DIAGNOSTIC_PUSH _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wformat-nonliteral") int result = vsnprintf(ret, n, format, ap); @@ -108,25 +106,25 @@ int asprintf_l(char** ret, locale_t loc, const char* format, ...) { return result; } int vasprintf_l(char** ret, locale_t loc, const char* format, va_list ap) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return __libcpp_vasprintf(ret, format, ap); } #if !defined(_LIBCPP_MSVCRT) float strtof_l(const char* nptr, char** endptr, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return strtof(nptr, endptr); } long double strtold_l(const char* nptr, char** endptr, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return strtold(nptr, endptr); } #endif #if defined(__MINGW32__) && __MSVCRT_VERSION__ < 0x0800 size_t strftime_l(char* ret, size_t n, const char* format, const struct tm* tm, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return strftime(ret, n, format, tm); } #endif From ba81e1949a4f25216e2b3ea3a1507a52db88562a Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Fri, 25 Oct 2024 17:32:25 +0100 Subject: [PATCH 11/39] [AArch64] Add assembly/disassembly for BFMOP4{A,S} (widening) instructions (#113203) The new instructions are described in https://developer.arm.com/documentation/ddi0602/2024-09/SME-Instructions --- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 3 + llvm/lib/Target/AArch64/SMEInstrFormats.td | 37 +++ .../SME2p2/bfmop4as-widening-diagnostics.s | 220 ++++++++++++++++++ .../MC/AArch64/SME2p2/bfmop4as-widening.s | 178 ++++++++++++++ 4 files changed, 438 insertions(+) create mode 100644 llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 6044b5bb7d81511..b763aa15a7c3f15 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -1004,6 +1004,9 @@ let Predicates = [HasSME2p2] in { def FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, "ftmopa">; def FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, "ftmopa">; def BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, "bftmopa">; + + defm BFMOP4A : sme2_bfmop4as_widening<0, "bfmop4a">; + defm BFMOP4S : sme2_bfmop4as_widening<1, "bfmop4s">; } // [HasSME2p2] let Predicates = [HasSME2p2, HasSMEB16B16] in { diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 08929ed5616b2c8..4cfe18eddf481cb 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -5188,3 +5188,40 @@ class sme2_luti4_vector_vg4_strided sz, bits<2> op, string mnemonic> let Inst{3-2} = 0b00; let Inst{1-0} = Zd{1-0}; } + +class sme2_bf16_fp32_quarter_tile_outer_product + : I<(outs TileOp32:$ZAda), + (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), + mnemonic, "\t$ZAda, $Zn, $Zm", + "", []>, Sched<[]> { + bits<2> ZAda; + bits<3> Zn; + bits<3> Zm; + + let Inst{31-21} = 0b10000001000; + let Inst{20} = M; + let Inst{19-17} = Zm; + let Inst{16-10} = 0b0000000; + let Inst{9} = N; + let Inst{8-6} = Zn; + let Inst{5} = 0; + let Inst{4} = S; + let Inst{3-2} = 0b00; + let Inst{1-0} = ZAda; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_bfmop4as_widening { + // Single vectors + def _MZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>; + + // Multiple and single vectors + def _M2ZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; + + // Single and multiple vectors + def _MZ2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>; + + // Multiple vectors + def _M2Z2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; +} diff --git a/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s new file mode 100644 index 000000000000000..5906bcb07f15d5a --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s @@ -0,0 +1,220 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2 < %s 2>&1 | FileCheck %s + +// BFMOP4A + +// Single vectors + +bfmop4a za0.d, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4a za4.s, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.s, z0.s, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.s, z15.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.s, z16.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.s, z0.h, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.s, z12.h, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.s, z12.h, z14.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.s, z12.h, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Single and multiple vectors + +bfmop4a za0.d, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4a za4.s, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.s, z0.s, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.s, z1.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.s, z16.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.s, z0.h, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.s, z0.h, {z17.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.s, z0.h, {z12.h-z13.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +bfmop4a za0.d, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4a za4.s, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.s, {z0.s-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +bfmop4a za0.s, {z1.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.s, {z16.h-z17.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.s, {z0.h-z1.h}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.s, {z0.h-z1.h}, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.s, {z0.h-z1.h}, z12.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Multiple vectors + +bfmop4a za0.d, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4a za4.s, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.s, {z0.s-z1.s}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.s, {z1.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.s, {z18.h-z19.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.s, {z0.h-z1.h}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.s, {z0.h-z1.h}, {z19.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.s, {z0.h-z1.h}, {z10.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + + +// BFMOP4S + +// Single vectors + +bfmop4s za0.d, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4s za4.s, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.s, z0.s, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.s, z15.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.s, z16.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.s, z0.h, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.s, z12.h, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.s, z12.h, z14.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.s, z12.h, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Single and multiple vectors + +bfmop4s za0.d, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4s za4.s, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.s, z0.s, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.s, z1.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.s, z16.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.s, z0.h, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.s, z0.h, {z17.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.s, z0.h, {z12.h-z13.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +bfmop4s za0.d, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4s za4.s, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.s, {z0.s-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +bfmop4s za0.s, {z1.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.s, {z16.h-z17.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.s, {z0.h-z1.h}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.s, {z0.h-z1.h}, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.s, {z0.h-z1.h}, z12.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Multiple vectors + +bfmop4s za0.d, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4s za4.s, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.s, {z0.s-z1.s}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.s, {z1.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.s, {z18.h-z19.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.s, {z0.h-z1.h}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.s, {z0.h-z1.h}, {z19.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.s, {z0.h-z1.h}, {z10.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types diff --git a/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s new file mode 100644 index 000000000000000..40d08e503c8bb32 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s @@ -0,0 +1,178 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// BFMOP4A + +// Single vectors + +bfmop4a za0.s, z0.h, z16.h // 10000001-00000000-00000000-00000000 +// CHECK-INST: bfmop4a za0.s, z0.h, z16.h +// CHECK-ENCODING: [0x00,0x00,0x00,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81000000 + +bfmop4a za3.s, z14.h, z30.h // 10000001-00001110-00000001-11000011 +// CHECK-INST: bfmop4a za3.s, z14.h, z30.h +// CHECK-ENCODING: [0xc3,0x01,0x0e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 810e01c3 + +bfmop4a za1.s, z10.h, z20.h // 10000001-00000100-00000001-01000001 +// CHECK-INST: bfmop4a za1.s, z10.h, z20.h +// CHECK-ENCODING: [0x41,0x01,0x04,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81040141 + +// Single and multiple vectors + +bfmop4a za0.s, z0.h, {z16.h-z17.h} // 10000001-00010000-00000000-00000000 +// CHECK-INST: bfmop4a za0.s, z0.h, { z16.h, z17.h } +// CHECK-ENCODING: [0x00,0x00,0x10,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81100000 + +bfmop4a za3.s, z14.h, {z30.h-z31.h} // 10000001-00011110-00000001-11000011 +// CHECK-INST: bfmop4a za3.s, z14.h, { z30.h, z31.h } +// CHECK-ENCODING: [0xc3,0x01,0x1e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 811e01c3 + +bfmop4a za2.s, z12.h, {z24.h-z25.h} // 10000001-00011000-00000001-10000010 +// CHECK-INST: bfmop4a za2.s, z12.h, { z24.h, z25.h } +// CHECK-ENCODING: [0x82,0x01,0x18,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81180182 + +// Multiple and single vectors + +bfmop4a za0.s, {z0.h-z1.h}, z16.h // 10000001-00000000-00000010-00000000 +// CHECK-INST: bfmop4a za0.s, { z0.h, z1.h }, z16.h +// CHECK-ENCODING: [0x00,0x02,0x00,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81000200 + +bfmop4a za3.s, {z14.h-z15.h}, z30.h // 10000001-00001110-00000011-11000011 +// CHECK-INST: bfmop4a za3.s, { z14.h, z15.h }, z30.h +// CHECK-ENCODING: [0xc3,0x03,0x0e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 810e03c3 + +bfmop4a za2.s, {z12.h-z13.h}, z28.h // 10000001-00001100-00000011-10000010 +// CHECK-INST: bfmop4a za2.s, { z12.h, z13.h }, z28.h +// CHECK-ENCODING: [0x82,0x03,0x0c,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 810c0382 + +// Multiple vectors + +bfmop4a za0.s, {z0.h-z1.h}, {z16.h-z17.h} // 10000001-00010000-00000010-00000000 +// CHECK-INST: bfmop4a za0.s, { z0.h, z1.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x00,0x02,0x10,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81100200 + +bfmop4a za3.s, {z14.h-z15.h}, {z30.h-z31.h} // 10000001-00011110-00000011-11000011 +// CHECK-INST: bfmop4a za3.s, { z14.h, z15.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc3,0x03,0x1e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 811e03c3 + +bfmop4a za2.s, {z12.h-z13.h}, {z26.h-z27.h} // 10000001-00011010-00000011-10000010 +// CHECK-INST: bfmop4a za2.s, { z12.h, z13.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x82,0x03,0x1a,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 811a0382 + + +// BFMOP4S + +// Single vectors + +bfmop4s za0.s, z0.h, z16.h // 10000001-00000000-00000000-00010000 +// CHECK-INST: bfmop4s za0.s, z0.h, z16.h +// CHECK-ENCODING: [0x10,0x00,0x00,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81000010 + +bfmop4s za3.s, z14.h, z30.h // 10000001-00001110-00000001-11010011 +// CHECK-INST: bfmop4s za3.s, z14.h, z30.h +// CHECK-ENCODING: [0xd3,0x01,0x0e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 810e01d3 + +bfmop4s za1.s, z10.h, z20.h // 10000001-00000100-00000001-01010001 +// CHECK-INST: bfmop4s za1.s, z10.h, z20.h +// CHECK-ENCODING: [0x51,0x01,0x04,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81040151 + +// Single and multiple vectors + +bfmop4s za0.s, z0.h, {z16.h-z17.h} // 10000001-00010000-00000000-00010000 +// CHECK-INST: bfmop4s za0.s, z0.h, { z16.h, z17.h } +// CHECK-ENCODING: [0x10,0x00,0x10,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81100010 + +bfmop4s za3.s, z14.h, {z30.h-z31.h} // 10000001-00011110-00000001-11010011 +// CHECK-INST: bfmop4s za3.s, z14.h, { z30.h, z31.h } +// CHECK-ENCODING: [0xd3,0x01,0x1e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 811e01d3 + +bfmop4s za2.s, z12.h, {z24.h-z25.h} // 10000001-00011000-00000001-10010010 +// CHECK-INST: bfmop4s za2.s, z12.h, { z24.h, z25.h } +// CHECK-ENCODING: [0x92,0x01,0x18,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81180192 + +// Multiple and single vectors + +bfmop4s za0.s, {z0.h-z1.h}, z16.h // 10000001-00000000-00000010-00010000 +// CHECK-INST: bfmop4s za0.s, { z0.h, z1.h }, z16.h +// CHECK-ENCODING: [0x10,0x02,0x00,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81000210 + +bfmop4s za3.s, {z14.h-z15.h}, z30.h // 10000001-00001110-00000011-11010011 +// CHECK-INST: bfmop4s za3.s, { z14.h, z15.h }, z30.h +// CHECK-ENCODING: [0xd3,0x03,0x0e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 810e03d3 + +bfmop4s za2.s, {z12.h-z13.h}, z28.h // 10000001-00001100-00000011-10010010 +// CHECK-INST: bfmop4s za2.s, { z12.h, z13.h }, z28.h +// CHECK-ENCODING: [0x92,0x03,0x0c,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 810c0392 + +// Multiple vectors + +bfmop4s za0.s, {z0.h-z1.h}, {z16.h-z17.h} // 10000001-00010000-00000010-00010000 +// CHECK-INST: bfmop4s za0.s, { z0.h, z1.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x10,0x02,0x10,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81100210 + +bfmop4s za3.s, {z14.h-z15.h}, {z30.h-z31.h} // 10000001-00011110-00000011-11010011 +// CHECK-INST: bfmop4s za3.s, { z14.h, z15.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd3,0x03,0x1e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 811e03d3 + +bfmop4s za2.s, {z12.h-z13.h}, {z26.h-z27.h} // 10000001-00011010-00000011-10010010 +// CHECK-INST: bfmop4s za2.s, { z12.h, z13.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x92,0x03,0x1a,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 811a0392 From 4161ca2092d3b92034515190f577aa200ec615bf Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Fri, 25 Oct 2024 14:54:56 +0100 Subject: [PATCH 12/39] [NFC][AArch64][LLVM] Update ReleaseNotes.md with Armv9.6-A (2024) arch extensions --- llvm/docs/ReleaseNotes.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 7cca9116a513451..be51b0af56ddbf7 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -110,6 +110,9 @@ Changes to the AArch64 Backend the required alignment space with a sequence of `0x0` bytes (the requested fill value) rather than NOPs. +* Assembler/disassembler support has been added for Armv9.6-A (2024) + architecture extensions. + Changes to the AMDGPU Backend ----------------------------- From 2ec5c69b6872b8b474f3d37b9125d3d57d144d1b Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Fri, 25 Oct 2024 09:42:01 -0700 Subject: [PATCH 13/39] Revert "[Sanitizers] Intercept timer_create" (#113710) Reverts llvm/llvm-project#112285 --- .../lib/hwasan/hwasan_platform_interceptors.h | 3 --- compiler-rt/lib/msan/tests/msan_test.cpp | 23 ------------------- .../sanitizer_common_interceptors.inc | 19 --------------- .../sanitizer_platform_interceptors.h | 3 --- .../sanitizer_platform_limits_posix.h | 4 ---- 5 files changed, 52 deletions(-) diff --git a/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h b/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h index e8011014c2331d7..d92b51052194275 100644 --- a/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h +++ b/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h @@ -200,9 +200,6 @@ #undef SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID #define SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID 0 -#undef SANITIZER_INTERCEPT_TIMER_CREATE -#define SANITIZER_INTERCEPT_TIMER_CREATE 0 - #undef SANITIZER_INTERCEPT_GETITIMER #define SANITIZER_INTERCEPT_GETITIMER 0 diff --git a/compiler-rt/lib/msan/tests/msan_test.cpp b/compiler-rt/lib/msan/tests/msan_test.cpp index ad265acf4c1e39a..41b99fabe84f478 100644 --- a/compiler-rt/lib/msan/tests/msan_test.cpp +++ b/compiler-rt/lib/msan/tests/msan_test.cpp @@ -4881,27 +4881,4 @@ TEST(MemorySanitizer, throw_catch) { // pass } } - -#if defined(__linux__) -TEST(MemorySanitizer, timer_create) { - timer_t timer; - EXPECT_POISONED(timer); - int res = timer_create(CLOCK_REALTIME, nullptr, &timer); - ASSERT_EQ(0, res); - EXPECT_NOT_POISONED(timer); - - // Make sure the timer is usable. - struct itimerspec cur_value {}; - cur_value.it_value.tv_sec = 1; - EXPECT_EQ(0, timer_settime(timer, 0, &cur_value, nullptr)); - - timer_t timer2; - EXPECT_POISONED(timer2); - // Use an invalid clock_id to make timer_create fail. - res = timer_create(INT_MAX, nullptr, &timer2); - ASSERT_EQ(-1, res); - EXPECT_POISONED(timer2); - timer_delete(timer); -} -#endif } // namespace diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index 211f9f70d7e4c6c..b8627f8557afe29 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -2289,24 +2289,6 @@ INTERCEPTOR(int, pthread_getcpuclockid, uptr thread, #define INIT_CLOCK_GETCPUCLOCKID #endif -#if SANITIZER_INTERCEPT_TIMER_CREATE -INTERCEPTOR(int, timer_create, __sanitizer_clockid_t clockid, void *sevp, - __sanitizer_timer_t *timer) { - void *ctx; - COMMON_INTERCEPTOR_ENTER(ctx, timer_create, clockid, sevp, timer); - int res = REAL(timer_create)(clockid, sevp, timer); - if (!res && timer) { - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, timer, sizeof *timer); - } - return res; -} - -# define INIT_TIMER_CREATE \ - COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(timer_create, "GLIBC_2.3.3"); -#else -# define INIT_TIMER_CREATE -#endif - #if SANITIZER_INTERCEPT_GETITIMER INTERCEPTOR(int, getitimer, int which, void *curr_value) { void *ctx; @@ -10284,7 +10266,6 @@ static void InitializeCommonInterceptors() { INIT_SETPWENT; INIT_CLOCK_GETTIME; INIT_CLOCK_GETCPUCLOCKID; - INIT_TIMER_CREATE; INIT_GETITIMER; INIT_TIME; INIT_GLOB; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 36fafdc642642bf..6959a6d52d604e0 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -237,9 +237,6 @@ (SI_FREEBSD || SI_NETBSD || SI_LINUX || SI_SOLARIS) #define SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID \ (SI_LINUX || SI_FREEBSD || SI_NETBSD) -// TODO: This should be SI_POSIX, adding Linux first until I have time -// to verify all timer_t typedefs on other platforms. -#define SANITIZER_INTERCEPT_TIMER_CREATE SI_LINUX #define SANITIZER_INTERCEPT_GETITIMER SI_POSIX #define SANITIZER_INTERCEPT_TIME SI_POSIX #define SANITIZER_INTERCEPT_GLOB (SI_GLIBC || SI_SOLARIS) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h index b4ccf7b3d7bef48..e8c81aa8e281637 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h @@ -1517,10 +1517,6 @@ extern const int si_SEGV_ACCERR; #define SIGACTION_SYMNAME sigaction -# if SANITIZER_LINUX -typedef void *__sanitizer_timer_t; -# endif - #endif // SANITIZER_LINUX || SANITIZER_APPLE #endif From 9ea6fcd02b172ec12c9d4b9157d4a37765d83421 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 25 Oct 2024 16:47:08 +0000 Subject: [PATCH 14/39] [gn build] Port 1f2b7ae6d789 --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 1630c8004d31575..0586704850a51b2 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -568,11 +568,11 @@ if (current_toolchain == default_toolchain) { "__locale_dir/locale_base_api/bsd_locale_fallbacks.h", "__locale_dir/locale_base_api/fuchsia.h", "__locale_dir/locale_base_api/ibm.h", - "__locale_dir/locale_base_api/locale_guard.h", "__locale_dir/locale_base_api/musl.h", "__locale_dir/locale_base_api/newlib.h", "__locale_dir/locale_base_api/openbsd.h", "__locale_dir/locale_base_api/win32.h", + "__locale_dir/locale_guard.h", "__math/abs.h", "__math/copysign.h", "__math/error_functions.h", From 305a1ceae371b482375545650ba9fd9e4c165157 Mon Sep 17 00:00:00 2001 From: Alexander Richardson Date: Fri, 25 Oct 2024 10:02:40 -0700 Subject: [PATCH 15/39] [DataLayout] Refactor storage of non-integral address spaces Instead of storing this as a separate array of non-integral pointers, add it to the PointerSpec class instead. This will allow for future simplifications such as splitting the non-integral property into multiple distinct ones: relocatable (i.e. non-stable representation) and non-integral representation (i.e. pointers with metadata). Reviewed By: arsenm Pull Request: https://github.com/llvm/llvm-project/pull/105734 --- llvm/include/llvm/IR/DataLayout.h | 28 ++++++++++++++++----------- llvm/lib/IR/DataLayout.cpp | 32 ++++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 8f7ab2f9df389ef..93bd519f5727d80 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -78,7 +78,11 @@ class DataLayout { Align ABIAlign; Align PrefAlign; uint32_t IndexBitWidth; - + /// Pointers in this address space don't have a well-defined bitwise + /// representation (e.g. may be relocated by a copying garbage collector). + /// Additionally, they may also be non-integral (i.e. containing additional + /// metadata such as bounds information/permissions). + bool IsNonIntegral; bool operator==(const PointerSpec &Other) const; }; @@ -133,10 +137,6 @@ class DataLayout { // The StructType -> StructLayout map. mutable void *LayoutMap = nullptr; - /// Pointers in these address spaces are non-integral, and don't have a - /// well-defined bitwise representation. - SmallVector NonIntegralAddressSpaces; - /// Sets or updates the specification for the given primitive type. void setPrimitiveSpec(char Specifier, uint32_t BitWidth, Align ABIAlign, Align PrefAlign); @@ -147,7 +147,8 @@ class DataLayout { /// Sets or updates the specification for pointer in the given address space. void setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align ABIAlign, - Align PrefAlign, uint32_t IndexBitWidth); + Align PrefAlign, uint32_t IndexBitWidth, + bool IsNonIntegral); /// Internal helper to get alignment for integer of given bitwidth. Align getIntegerAlignment(uint32_t BitWidth, bool abi_or_pref) const; @@ -165,7 +166,8 @@ class DataLayout { Error parsePointerSpec(StringRef Spec); /// Attempts to parse a single specification. - Error parseSpecification(StringRef Spec); + Error parseSpecification(StringRef Spec, + SmallVectorImpl &NonIntegralAddressSpaces); /// Attempts to parse a data layout string. Error parseLayoutString(StringRef LayoutString); @@ -337,13 +339,17 @@ class DataLayout { /// Return the address spaces containing non-integral pointers. Pointers in /// this address space don't have a well-defined bitwise representation. - ArrayRef getNonIntegralAddressSpaces() const { - return NonIntegralAddressSpaces; + SmallVector getNonIntegralAddressSpaces() const { + SmallVector AddrSpaces; + for (const PointerSpec &PS : PointerSpecs) { + if (PS.IsNonIntegral) + AddrSpaces.push_back(PS.AddrSpace); + } + return AddrSpaces; } bool isNonIntegralAddressSpace(unsigned AddrSpace) const { - ArrayRef NonIntegralSpaces = getNonIntegralAddressSpaces(); - return is_contained(NonIntegralSpaces, AddrSpace); + return getPointerSpec(AddrSpace).IsNonIntegral; } bool isNonIntegralPointerType(PointerType *PT) const { diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index d295d1f5785eb9d..a4af0ead07cf616 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -151,7 +151,8 @@ bool DataLayout::PrimitiveSpec::operator==(const PrimitiveSpec &Other) const { bool DataLayout::PointerSpec::operator==(const PointerSpec &Other) const { return AddrSpace == Other.AddrSpace && BitWidth == Other.BitWidth && ABIAlign == Other.ABIAlign && PrefAlign == Other.PrefAlign && - IndexBitWidth == Other.IndexBitWidth; + IndexBitWidth == Other.IndexBitWidth && + IsNonIntegral == Other.IsNonIntegral; } namespace { @@ -206,7 +207,8 @@ constexpr DataLayout::PrimitiveSpec DefaultVectorSpecs[] = { // Default pointer type specifications. constexpr DataLayout::PointerSpec DefaultPointerSpecs[] = { - {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64} // p0:64:64:64:64 + // p0:64:64:64:64 + {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false}, }; DataLayout::DataLayout() @@ -239,13 +241,11 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) { PointerSpecs = Other.PointerSpecs; StructABIAlignment = Other.StructABIAlignment; StructPrefAlignment = Other.StructPrefAlignment; - NonIntegralAddressSpaces = Other.NonIntegralAddressSpaces; return *this; } bool DataLayout::operator==(const DataLayout &Other) const { // NOTE: StringRepresentation might differ, it is not canonicalized. - // FIXME: NonIntegralAddressSpaces isn't compared. return BigEndian == Other.BigEndian && AllocaAddrSpace == Other.AllocaAddrSpace && ProgramAddrSpace == Other.ProgramAddrSpace && @@ -454,11 +454,13 @@ Error DataLayout::parsePointerSpec(StringRef Spec) { return createStringError( "index size cannot be larger than the pointer size"); - setPointerSpec(AddrSpace, BitWidth, ABIAlign, PrefAlign, IndexBitWidth); + setPointerSpec(AddrSpace, BitWidth, ABIAlign, PrefAlign, IndexBitWidth, + false); return Error::success(); } -Error DataLayout::parseSpecification(StringRef Spec) { +Error DataLayout::parseSpecification( + StringRef Spec, SmallVectorImpl &NonIntegralAddressSpaces) { // The "ni" specifier is the only two-character specifier. Handle it first. if (Spec.starts_with("ni")) { // ni:
[:
]... @@ -614,12 +616,23 @@ Error DataLayout::parseLayoutString(StringRef LayoutString) { // Split the data layout string into specifications separated by '-' and // parse each specification individually, updating internal data structures. + SmallVector NonIntegralAddressSpaces; for (StringRef Spec : split(LayoutString, '-')) { if (Spec.empty()) return createStringError("empty specification is not allowed"); - if (Error Err = parseSpecification(Spec)) + if (Error Err = parseSpecification(Spec, NonIntegralAddressSpaces)) return Err; } + // Mark all address spaces that were qualified as non-integral now. This has + // to be done later since the non-integral property is not part of the data + // layout pointer specification. + for (unsigned AS : NonIntegralAddressSpaces) { + // If there is no special spec for a given AS, getPointerSpec(AS) returns + // the spec for AS0, and we then update that to mark it non-integral. + const PointerSpec &PS = getPointerSpec(AS); + setPointerSpec(AS, PS.BitWidth, PS.ABIAlign, PS.PrefAlign, PS.IndexBitWidth, + true); + } return Error::success(); } @@ -666,16 +679,17 @@ DataLayout::getPointerSpec(uint32_t AddrSpace) const { void DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align ABIAlign, Align PrefAlign, - uint32_t IndexBitWidth) { + uint32_t IndexBitWidth, bool IsNonIntegral) { auto I = lower_bound(PointerSpecs, AddrSpace, LessPointerAddrSpace()); if (I == PointerSpecs.end() || I->AddrSpace != AddrSpace) { PointerSpecs.insert(I, PointerSpec{AddrSpace, BitWidth, ABIAlign, PrefAlign, - IndexBitWidth}); + IndexBitWidth, IsNonIntegral}); } else { I->BitWidth = BitWidth; I->ABIAlign = ABIAlign; I->PrefAlign = PrefAlign; I->IndexBitWidth = IndexBitWidth; + I->IsNonIntegral = IsNonIntegral; } } From 9d88543301f262e584a36ea969237a2cf054328b Mon Sep 17 00:00:00 2001 From: Abhina Sree Date: Fri, 25 Oct 2024 13:06:02 -0400 Subject: [PATCH 16/39] [AIX] Use internal lit shell for TableGen instead of a global setting (#113627) This is to address the latest lit regressions https://lab.llvm.org/buildbot/#/builders/64/builds/1285 caused by using the internal lit shell. This change will limit using the internal lit shell to TableGen on AIX so we do not hit these regressions. --- llvm/test/TableGen/lit.local.cfg | 8 ++++++++ llvm/utils/lit/lit/llvm/config.py | 7 ------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/llvm/test/TableGen/lit.local.cfg b/llvm/test/TableGen/lit.local.cfg index 0e827479cd41235..9d6dfdc14bbfb06 100644 --- a/llvm/test/TableGen/lit.local.cfg +++ b/llvm/test/TableGen/lit.local.cfg @@ -1,2 +1,10 @@ +import platform +import lit.formats + config.suffixes = [".td"] config.excludes = ["Common", "Inputs"] + +# AIX 'diff' command doesn't support --strip-trailing-cr, but the internal +# python implementation does, so use that for cross platform compatibility +if platform.system() == "AIX": + config.test_format = lit.formats.ShTest() diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py index 1ef5796cd32e448..5f762ec7f3514ab 100644 --- a/llvm/utils/lit/lit/llvm/config.py +++ b/llvm/utils/lit/lit/llvm/config.py @@ -57,13 +57,6 @@ def __init__(self, lit_config, config): self.lit_config.note("using lit tools: {}".format(path)) lit_path_displayed = True - if platform.system() == "AIX": - # Diff on AIX doesn't have all the required features (see - # https://github.com/llvm/llvm-project/pull/108871 and - # https://github.com/llvm/llvm-project/pull/112997#issuecomment-2429656192) - # so always use the internal shell. - self.use_lit_shell = True - if platform.system() == "OS/390": self.with_environment("_BPXK_AUTOCVT", "ON") self.with_environment("_TAG_REDIR_IN", "TXT") From f24c1dd08ea71fa7334a85fd2772c2f728de0c56 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 25 Oct 2024 18:11:20 +0100 Subject: [PATCH 17/39] Fix MSVC "signed/unsigned mismatch" warning. NFC. --- clang/tools/clang-format/ClangFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp index 96fb85e99bf5f0f..5522d05744a2b4c 100644 --- a/clang/tools/clang-format/ClangFormat.cpp +++ b/clang/tools/clang-format/ClangFormat.cpp @@ -510,7 +510,7 @@ static bool format(StringRef FileName, bool ErrorOnIncompleteFormat = false) { reformat(*FormatStyle, *ChangedCode, Ranges, AssumedFileName, &Status); Replaces = Replaces.merge(FormatChanges); if (DryRun) { - return Replaces.size() > (IsJson ? 1 : 0) && + return Replaces.size() > (IsJson ? 1u : 0u) && emitReplacementWarnings(Replaces, AssumedFileName, Code); } if (OutputXML) { From e6917e95548f81e7f00b8bca70ce571780e2afc9 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Fri, 25 Oct 2024 21:15:21 +0400 Subject: [PATCH 18/39] =?UTF-8?q?[clang][NFC]=20Add=20test=20for=20CWG1898?= =?UTF-8?q?=20"Use=20of=20=E2=80=9Cequivalent=E2=80=9D=20in=20overload=20r?= =?UTF-8?q?esolution"=20(#113439)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [CWG1898](https://cplusplus.github.io/CWG/issues/1898.html) Use of “equivalent” in overload resolution ==================== [P1787R6](https://wg21.link/p1787r6): > CWG1898 is resolved by explicitly using the defined term parameter-type-list. Except that now it's called non-object-parameter-type-list, which is defined in [dcl.fct] [p8](https://eel.is/c++draft/dcl.fct#8) and [p4](https://eel.is/c++draft/dcl.fct#8). As for the wording, the first sentence [\_N4140\_.[over.dcl]/1](https://timsong-cpp.github.io/cppwp/n4140/over.dcl#1) where the word "equivalent" was used: > Two function declarations of the same name refer to the same function if they are in the same scope and have equivalent parameter declarations ([over.load]). was replaced with what is now known as "corresponding overloads", defined in [[basic.scope.scope]/4](https://eel.is/c++draft/basic.scope#scope-4). The definition is present in P1787R6, but it's hard to reference, because the "corresponding overloads" term was coined later. --- clang/test/CXX/drs/cwg18xx.cpp | 83 ++++++++++++++++++++++++++++++++++ clang/www/cxx_dr_status.html | 2 +- 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/clang/test/CXX/drs/cwg18xx.cpp b/clang/test/CXX/drs/cwg18xx.cpp index 7f0fb8cf589d48c..b059492637bd5cf 100644 --- a/clang/test/CXX/drs/cwg18xx.cpp +++ b/clang/test/CXX/drs/cwg18xx.cpp @@ -640,3 +640,86 @@ namespace H { struct S s; } } + +namespace cwg1898 { // cwg1898: 2.7 +void e(int) {} // #cwg1898-e +void e(int) {} +// expected-error@-1 {{redefinition of 'e'}} +// expected-note@#cwg1898-e {{previous definition is here}} + +void e2(int) {} +void e2(long) {} // OK, different type + +void f(int) {} // #cwg1898-f +void f(const int) {} +// expected-error@-1 {{redefinition of 'f'}} +// expected-note@#cwg1898-f {{previous definition is here}} + +void g(int) {} // #cwg1898-g +void g(volatile int) {} +// since-cxx20-warning@-1 {{volatile-qualified parameter type 'volatile int' is deprecated}} +// expected-error@-2 {{redefinition of 'g'}} +// expected-note@#cwg1898-g {{previous definition is here}} + +void h(int *) {} // #cwg1898-h +void h(int[]) {} +// expected-error@-1 {{redefinition of 'h'}} +// expected-note@#cwg1898-h {{previous definition is here}} + +void h2(int *) {} // #cwg1898-h2 +void h2(int[2]) {} +// expected-error@-1 {{redefinition of 'h2'}} +// expected-note@#cwg1898-h2 {{previous definition is here}} + +void h3(int (*)[2]) {} // #cwg1898-h3 +void h3(int [3][2]) {} +// expected-error@-1 {{redefinition of 'h3'}} +// expected-note@#cwg1898-h3 {{previous definition is here}} + +void h4(int (*)[2]) {} +void h4(int [3][3]) {} // OK, differ in non-top-level extent of array + +void i(int *) {} +void i(const int *) {} // OK, pointee cv-qualification is not discarded + +void i2(int *) {} // #cwg1898-i2 +void i2(int * const) {} +// expected-error@-1 {{redefinition of 'i2'}} +// expected-note@#cwg1898-i2 {{previous definition is here}} + +void j(void(*)()) {} // #cwg1898-j +void j(void()) {} +// expected-error@-1 {{redefinition of 'j'}} +// expected-note@#cwg1898-j {{previous definition is here}} + +void j2(void(int)) {} // #cwg1898-j2 +void j2(void(const int)) {} +// expected-error@-1 {{redefinition of 'j2'}} +// expected-note@#cwg1898-j2 {{previous definition is here}} + +struct A { + void k(int) {} // #cwg1898-k + void k(int) {} + // expected-error@-1 {{class member cannot be redeclared}} + // expected-note@#cwg1898-k {{previous definition is here}} +}; + +struct B : A { + void k(int) {} // OK, shadows A::k +}; + +void l() {} +void l(...) {} + +#if __cplusplus >= 201103L +template +void m(T) {} +template +void m(Ts...) {} + +template +void m2(T, U) {} +template +void m2(Ts..., U) {} +#endif +} // namespace cwg1898 diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index 82ba9b370ba5953..6640ed477a241e5 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -11219,7 +11219,7 @@

C++ defect report implementation status

1898 CD6 Use of “equivalent” in overload resolution - Unknown + Clang 2.7 1899 From d3c29e8d2f11742e83e2b80df47391598bf2e857 Mon Sep 17 00:00:00 2001 From: Yijia Gu Date: Fri, 25 Oct 2024 10:24:31 -0700 Subject: [PATCH 19/39] [mlir][test][bazel] add missing deps for TestPass --- utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index 34beb758a12dd44..c69f793943beeca 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -547,8 +547,13 @@ cc_library( ":TestDialect", "//llvm:Support", "//mlir:FuncDialect", + "//mlir:GPUToSPIRV", + "//mlir:GPUTransforms", "//mlir:IR", "//mlir:Pass", + "//mlir:SPIRVDialect", + "//mlir:SPIRVToLLVM", + "//mlir:SPIRVTransforms", "//mlir:Support", ], ) From 6e7375031a1a3172d5e369cf2c108da2bcf65c8a Mon Sep 17 00:00:00 2001 From: Arvind Sudarsanam Date: Fri, 25 Oct 2024 10:27:42 -0700 Subject: [PATCH 20/39] [clang-linker-wrapper] Add error handling for missing linker path (#113613) In clang-linker-wrapper, we do not explicitly check if --linker-path is provided. This PR adds a check to capture this. Thanks --------- Signed-off-by: Arvind Sudarsanam --- clang/test/Driver/linker-wrapper.c | 4 ++++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp | 2 ++ 2 files changed, 6 insertions(+) diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index 068ea2d7d3c663c..470af4d5d70cac7 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -250,3 +250,7 @@ __attribute__((visibility("protected"), used)) int x; // MLLVM-SAME: -Xlinker -mllvm=-pass-remarks=foo,bar // OFFLOAD-OPT-NOT: -Xlinker -mllvm=-pass-remarks=foo,bar // OFFLOAD-OPT-SAME: {{$}} + +// Error handling when --linker-path is not provided for clang-linker-wrapper +// RUN: not clang-linker-wrapper 2>&1 | FileCheck --check-prefix=LINKER-PATH-NOT-PROVIDED %s +// LINKER-PATH-NOT-PROVIDED: linker path missing, must pass 'linker-path' diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 9fea1fdcd5fb466..9fcecaee318a79f 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -370,6 +370,8 @@ Error runLinker(ArrayRef Files, const ArgList &Args) { // Render the linker arguments and add the newly created image. We add it // after the output file to ensure it is linked with the correct libraries. StringRef LinkerPath = Args.getLastArgValue(OPT_linker_path_EQ); + if (LinkerPath.empty()) + return createStringError("linker path missing, must pass 'linker-path'"); ArgStringList NewLinkerArgs; for (const opt::Arg *Arg : Args) { // Do not forward arguments only intended for the linker wrapper. From ac4bd74190fedfbe025ef757ff308dd184a507f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= Date: Fri, 25 Oct 2024 10:39:26 -0700 Subject: [PATCH 21/39] [mlir] Add apply_patterns.linalg.pad_vectorization TD Op (#112504) This PR simply wraps `populatePadOpVectorizationPatterns` into a new Transform Dialect Op: `apply_patterns.linalg.pad_vectorization`. This change makes it possible to run (and test) the corresponding patterns _without_: `transform.structured.vectorize_children_and_apply_patterns`. Note that the Op above only supports non-masked vectorisation (i.e. when the inputs are static), so, effectively, only fixed-width vectorisation (as opposed to scalable vectorisation). As such, this change is required to construct vectorization pipelines for tensor.pad targeting scalable vectors. To test the new Op and the corresponding patterns, I added "vectorization-pad-patterns.mlir" - most tests have been extracted from "vectorization-with-patterns.mlir". --- .../Linalg/TransformOps/LinalgTransformOps.td | 20 ++ .../TransformOps/LinalgTransformOps.cpp | 5 + .../Linalg/Transforms/Vectorization.cpp | 3 + .../Linalg/vectorization-pad-patterns.mlir | 274 ++++++++++++++++++ .../Linalg/vectorization-with-patterns.mlir | 143 --------- 5 files changed, 302 insertions(+), 143 deletions(-) create mode 100644 mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index 040c04b0410ecf5..abf446887c54425 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -84,6 +84,26 @@ def ApplyFoldAddIntoDestPatternsOp : Op]> { + let description = [{ + Apply patterns that vectorize tensor.pad. + + These patterns rewrite tensor.pad Ops using vector.transfer_read and + vector.transfer_write operations. This is done either by: + 1. Folding tensor.pad with an existing vector.transfer_read / + vector.transfer_write Op (generated prior to running these patterns). + 2. Rewriting it (when matched together with q tensor.insert_slice + consumer Op) as a vector.transfer_read + vector.transfer_write pair. + + In both cases, these patterns look at producers and consumers for the + matched tensor.pad Op to find opportunities for vectorization. + }]; + + let assemblyFormat = "attr-dict"; +} + //===----------------------------------------------------------------------===// // BufferizeToAllocationOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 1f1d8ad89ae2b9b..3d3f0a93a3829bf 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -253,6 +253,11 @@ void transform::ApplyFoldAddIntoDestPatternsOp::populatePatterns( linalg::populateFoldAddIntoDestPatterns(patterns); } +void transform::ApplyPadVectorizationPatternsOp::populatePatterns( + RewritePatternSet &patterns) { + linalg::populatePadOpVectorizationPatterns(patterns); +} + //===----------------------------------------------------------------------===// // BufferizeToAllocationOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index e1b97fbf985df81..0a2457176a1d474 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -2712,6 +2712,9 @@ struct PadOpVectorizationWithInsertSlicePattern void mlir::linalg::populatePadOpVectorizationPatterns( RewritePatternSet &patterns, PatternBenefit baseBenefit) { + // TODO: The following pattern implements "decomposition" and + // optional "vectorization". Seperate "decomposition" into a sepereate + // pre-processing pattern group. patterns.add(patterns.getContext(), baseBenefit); // Try these specialized patterns first before resorting to the generic one. diff --git a/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir new file mode 100644 index 000000000000000..2aa4638af3f0f3b --- /dev/null +++ b/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir @@ -0,0 +1,274 @@ +// RUN: mlir-opt %s -transform-interpreter -split-input-file | FileCheck %s + +///---------------------------------------------------------------------------------------- +/// [Pattern: PadOpVectorizationWithTransferReadPattern] +///---------------------------------------------------------------------------------------- +// CHECK-LABEL: func @pad_and_transfer_read +// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> +// CHECK-NOT: tensor.pad +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C5:.*]] = arith.constant 5.0 +// CHECK: %[[RESULT:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32> +// CHECK: return %[[RESULT]] +func.func @pad_and_transfer_read(%arg0: tensor<5x6xf32>) -> vector<7x9xf32> { + %c0 = arith.constant 0 : index + %c5 = arith.constant 5.0 : f32 + %c6 = arith.constant 6.0 : f32 + %0 = tensor.pad %arg0 low[0, 0] high[5, 7] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %c5 : f32 + } : tensor<5x6xf32> to tensor<10x13xf32> + %1 = vector.transfer_read %0[%c0, %c0], %c6 + : tensor<10x13xf32>, vector<7x9xf32> + return %1 : vector<7x9xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func"> + + transform.apply_patterns to %func_op { + transform.apply_patterns.linalg.pad_vectorization + } : !transform.op<"func.func"> + transform.yield + } +} + +// ----- + +///---------------------------------------------------------------------------------------- +/// [Pattern: PadOpVectorizationWithTransferWritePattern] +///---------------------------------------------------------------------------------------- +func.func private @make_vector() -> vector<7x9xf32> + +// CHECK-LABEL: func @pad_and_transfer_write_static_low_and_high +// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> +// CHECK-NOT: tensor.pad +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32> +// CHECK: %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[ARG0]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor<5x6xf32> +// CHECK: return %[[RESULT]] +func.func @pad_and_transfer_write_static_low_and_high( + %arg0: tensor<5x6xf32>) -> tensor<5x6xf32> { + %c0 = arith.constant 0 : index + %c5 = arith.constant 5.0 : f32 + %0 = tensor.pad %arg0 low[0, 0] high[5, 7] { + ^bb0(%arg2: index, %arg3: index): + tensor.yield %c5 : f32 + } : tensor<5x6xf32> to tensor<10x13xf32> + %1 = call @make_vector() : () -> vector<7x9xf32> + %2 = vector.transfer_write %1, %0[%c0, %c0] + : vector<7x9xf32>, tensor<10x13xf32> + %3 = tensor.extract_slice %2[0, 0] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32> + return %3 : tensor<5x6xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func"> + + transform.apply_patterns to %func_op { + transform.apply_patterns.linalg.pad_vectorization + } : !transform.op<"func.func"> + transform.yield + } +} + +// ----- + +func.func private @make_vector() -> vector<7x9xf32> + +// CHECK-LABEL: func @pad_and_transfer_write_static_low_dynamic_high +// CHECK-SAME: %[[ARG0:.*]]: tensor, %[[SIZE:.*]]: index, %[[PADDING:.*]]: index +// CHECK-NOT: tensor.pad +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[SUB:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [%[[SIZE]], 6] [1, 1] : tensor to tensor +// CHECK: %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32> +// CHECK: %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[SUB]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor +// CHECK: return %[[RESULT]] +func.func @pad_and_transfer_write_static_low_dynamic_high( + %arg0: tensor, %size: index, %padding: index) -> tensor { + %c0 = arith.constant 0 : index + %c5 = arith.constant 5.0 : f32 + %s = tensor.extract_slice %arg0[0, 0] [%size, 6] [1, 1] + : tensor to tensor + %0 = tensor.pad %s low[0, 0] high[%padding, 7] { + ^bb0(%arg2: index, %arg3: index): + tensor.yield %c5 : f32 + } : tensor to tensor + %1 = call @make_vector() : () -> vector<7x9xf32> + %2 = vector.transfer_write %1, %0[%c0, %c0] + : vector<7x9xf32>, tensor + %3 = tensor.extract_slice %2[0, 0] [%size, 6] [1, 1] : tensor to tensor + return %3 : tensor +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func"> + + transform.apply_patterns to %func_op { + transform.apply_patterns.linalg.pad_vectorization + } : !transform.op<"func.func"> + transform.yield + } +} + + +// ----- + +///---------------------------------------------------------------------------------------- +/// [Pattern: PadOpVectorizationWithInsertSlicePattern] +///---------------------------------------------------------------------------------------- + +func.func private @make_vector() -> tensor<12x13xf32> + +// CHECK-LABEL: func @pad_and_insert_slice_source +// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> +// CHECK-NOT: tensor.pad +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C5:.*]] = arith.constant 5.0 +// CHECK: %[[VEC0:.*]] = call @make_vector() : () -> tensor<12x13xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32> +// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[VEC0]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<7x9xf32>, tensor<12x13xf32> +// CHECK: return %[[WRITE]] +func.func @pad_and_insert_slice_source( + %arg0: tensor<5x6xf32>) -> tensor<12x13xf32> { + %c0 = arith.constant 0 : index + %c5 = arith.constant 5.0 : f32 + %0 = tensor.pad %arg0 low[0, 0] high[2, 3] { + ^bb0(%arg2: index, %arg3: index): + tensor.yield %c5 : f32 + } : tensor<5x6xf32> to tensor<7x9xf32> + %1 = call @make_vector() : () -> tensor<12x13xf32> + %r = tensor.insert_slice %0 into %1[0, 0][7, 9][1, 1] : tensor<7x9xf32> into tensor<12x13xf32> + return %r : tensor<12x13xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func"> + + transform.apply_patterns to %func_op { + transform.apply_patterns.linalg.pad_vectorization + } : !transform.op<"func.func"> + transform.yield + } +} + + +// ----- + +///---------------------------------------------------------------------------------------- +/// tensor::PadOp -> tensor::EmptyOp + linalg::FillOp/tensor::GenerateOp + tensor::InsertSliceOp +/// [Pattern: GenericPadOpVectorizationPattern] +///---------------------------------------------------------------------------------------- + +func.func private @make_vector() -> tensor<12x13xf32> + +// Same as @pad_and_insert_slice_dest in vectorization-with-patterns.mlir, but +// over here linalg::fill is not vectorized (patterns for linalg.fill are not +// included here) +// CHECK-LABEL: func.func @pad_and_insert_slice_dest( +// CHECK-SAME: %[[ARG_0:.*]]: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> { +// CHECK-NOT: tensor.pad +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[PAD:.*]] = arith.constant 5.000000e+00 : f32 +// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<1x12x13xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[PAD]] : f32) outs(%[[EMPTY]] : tensor<1x12x13xf32>) -> tensor<1x12x13xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x5x6xf32>, vector<1x5x6xf32> +// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x5x6xf32>, tensor<1x12x13xf32> +// CHECK: %[[VEC:.*]] = call @make_vector() : () -> tensor<12x13xf32> +// CHECK: %[[RES:.*]] = tensor.insert_slice %[[VEC]] into %[[WRITE]][0, 0, 0] [1, 12, 13] [1, 1, 1] : tensor<12x13xf32> into tensor<1x12x13xf32> +// CHECK: return %[[RES]] : tensor<1x12x13xf32> + +func.func @pad_and_insert_slice_dest( + %arg0: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> { + %c5 = arith.constant 5.0 : f32 + %0 = tensor.pad %arg0 low[0, 0, 0] high[0, 7, 7] { + ^bb0(%arg2: index, %arg3: index, %arg4: index): + tensor.yield %c5 : f32 + } : tensor<1x5x6xf32> to tensor<1x12x13xf32> + %1 = call @make_vector() : () -> tensor<12x13xf32> + %r = tensor.insert_slice %1 into %0[0, 0, 0][1, 12, 13][1, 1, 1] : tensor<12x13xf32> into tensor<1x12x13xf32> + return %r : tensor<1x12x13xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func"> + + transform.apply_patterns to %func_op { + transform.apply_patterns.linalg.pad_vectorization + } : !transform.op<"func.func"> + transform.yield + } +} + +// ----- +func.func private @make_vector() -> vector<7x9xf32> + +// Variant of @pad_and_transfer_write_static + +// CHECK-LABEL: func @pad_and_transfer_write_static_non_zero_low_pad +// CHECK-NOT: tensor.pad +// CHECK: linalg.fill +func.func @pad_and_transfer_write_static_non_zero_low_pad( + %arg0: tensor<5x6xf32>) -> tensor<5x6xf32> { + %c0 = arith.constant 0 : index + %c5 = arith.constant 5.0 : f32 + %0 = tensor.pad %arg0 low[0, 1] high[5, 6] { + ^bb0(%arg2: index, %arg3: index): + tensor.yield %c5 : f32 + } : tensor<5x6xf32> to tensor<10x13xf32> + %1 = call @make_vector() : () -> vector<7x9xf32> + %2 = vector.transfer_write %1, %0[%c0, %c0] + : vector<7x9xf32>, tensor<10x13xf32> + %3 = tensor.extract_slice %2[0, 0] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32> + return %3 : tensor<5x6xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func"> + + transform.apply_patterns to %func_op { + transform.apply_patterns.linalg.pad_vectorization + } : !transform.op<"func.func"> + transform.yield + } +} + +// ----- +func.func private @make_vector() -> vector<7x9xf32> + +// Variant of @pad_and_transfer_write_static + +// CHECK-LABEL: func @pad_and_transfer_write_static_non_zero_offset +// CHECK-NOT: tensor.pad +// CHECK: linalg.fill +func.func @pad_and_transfer_write_static_non_zero_offset( + %arg0: tensor<5x6xf32>) -> tensor<5x6xf32> { + %c0 = arith.constant 0 : index + %c5 = arith.constant 5.0 : f32 + %0 = tensor.pad %arg0 low[0, 1] high[5, 6] { + ^bb0(%arg2: index, %arg3: index): + tensor.yield %c5 : f32 + } : tensor<5x6xf32> to tensor<10x13xf32> + %1 = call @make_vector() : () -> vector<7x9xf32> + %2 = vector.transfer_write %1, %0[%c0, %c0] + : vector<7x9xf32>, tensor<10x13xf32> + %3 = tensor.extract_slice %2[0, 1] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32> + return %3 : tensor<5x6xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func"> + + transform.apply_patterns to %func_op { + transform.apply_patterns.linalg.pad_vectorization + } : !transform.op<"func.func"> + transform.yield + } +} diff --git a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir index 1c6a786bfa436d9..189507d97d6dc2f 100644 --- a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir @@ -935,149 +935,6 @@ module attributes {transform.with_named_sequence} { } } -// ----- - -// CHECK-LABEL: func @pad_and_transfer_read -// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK-NOT: tensor.pad -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C5:.*]] = arith.constant 5.0 -// CHECK: %[[RESULT:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32> -// CHECK: return %[[RESULT]] -func.func @pad_and_transfer_read(%arg0: tensor<5x6xf32>) -> vector<7x9xf32> { - %c0 = arith.constant 0 : index - %c5 = arith.constant 5.0 : f32 - %c6 = arith.constant 6.0 : f32 - %0 = tensor.pad %arg0 low[0, 0] high[5, 7] { - ^bb0(%arg1: index, %arg2: index): - tensor.yield %c5 : f32 - } : tensor<5x6xf32> to tensor<10x13xf32> - %1 = vector.transfer_read %0[%c0, %c0], %c6 - : tensor<10x13xf32>, vector<7x9xf32> - return %1 : vector<7x9xf32> -} - - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_padding } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -func.func private @make_vector() -> vector<7x9xf32> - -// CHECK-LABEL: func @pad_and_transfer_write_static -// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK-NOT: tensor.pad -// CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32> -// CHECK: %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[ARG0]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor<5x6xf32> -// CHECK: return %[[RESULT]] -func.func @pad_and_transfer_write_static( - %arg0: tensor<5x6xf32>) -> tensor<5x6xf32> { - %c0 = arith.constant 0 : index - %c5 = arith.constant 5.0 : f32 - %0 = tensor.pad %arg0 low[0, 0] high[5, 7] { - ^bb0(%arg2: index, %arg3: index): - tensor.yield %c5 : f32 - } : tensor<5x6xf32> to tensor<10x13xf32> - %1 = call @make_vector() : () -> vector<7x9xf32> - %2 = vector.transfer_write %1, %0[%c0, %c0] - : vector<7x9xf32>, tensor<10x13xf32> - %3 = tensor.extract_slice %2[0, 0] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32> - return %3 : tensor<5x6xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %4 = transform.get_parent_op %3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %5 = transform.structured.vectorize_children_and_apply_patterns %4 { vectorize_padding } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - - -// ----- - -func.func private @make_vector() -> vector<7x9xf32> - -// CHECK-LABEL: func @pad_and_transfer_write_dynamic_static -// CHECK-SAME: %[[ARG0:.*]]: tensor, %[[SIZE:.*]]: index, %[[PADDING:.*]]: index -// CHECK-NOT: tensor.pad -// CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[SUB:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [%[[SIZE]], 6] [1, 1] : tensor to tensor -// CHECK: %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32> -// CHECK: %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[SUB]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor -// CHECK: return %[[RESULT]] -func.func @pad_and_transfer_write_dynamic_static( - %arg0: tensor, %size: index, %padding: index) -> tensor { - %c0 = arith.constant 0 : index - %c5 = arith.constant 5.0 : f32 - %s = tensor.extract_slice %arg0[0, 0] [%size, 6] [1, 1] - : tensor to tensor - %0 = tensor.pad %s low[0, 0] high[%padding, 7] { - ^bb0(%arg2: index, %arg3: index): - tensor.yield %c5 : f32 - } : tensor to tensor - %1 = call @make_vector() : () -> vector<7x9xf32> - %2 = vector.transfer_write %1, %0[%c0, %c0] - : vector<7x9xf32>, tensor - %3 = tensor.extract_slice %2[0, 0] [%size, 6] [1, 1] : tensor to tensor - return %3 : tensor -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %4 = transform.get_parent_op %3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %5 = transform.structured.vectorize_children_and_apply_patterns %4 { vectorize_padding } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - - -// ----- - -func.func private @make_vector() -> tensor<12x13xf32> - -// CHECK-LABEL: func @pad_and_insert_slice_source -// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK-NOT: tensor.pad -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C5:.*]] = arith.constant 5.0 -// CHECK: %[[VEC0:.*]] = call @make_vector() : () -> tensor<12x13xf32> -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32> -// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[VEC0]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<7x9xf32>, tensor<12x13xf32> -// CHECK: return %[[WRITE]] -func.func @pad_and_insert_slice_source( - %arg0: tensor<5x6xf32>) -> tensor<12x13xf32> { - %c0 = arith.constant 0 : index - %c5 = arith.constant 5.0 : f32 - %0 = tensor.pad %arg0 low[0, 0] high[2, 3] { - ^bb0(%arg2: index, %arg3: index): - tensor.yield %c5 : f32 - } : tensor<5x6xf32> to tensor<7x9xf32> - %1 = call @make_vector() : () -> tensor<12x13xf32> - %r = tensor.insert_slice %0 into %1[0, 0][7, 9][1, 1] : tensor<7x9xf32> into tensor<12x13xf32> - return %r : tensor<12x13xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %4 = transform.get_parent_op %3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %5 = transform.structured.vectorize_children_and_apply_patterns %4 { vectorize_padding } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - - // ----- func.func private @make_vector() -> tensor<12x13xf32> From 14db06946839729befd6bd3ced8142547f5fd139 Mon Sep 17 00:00:00 2001 From: ssijaric-nv Date: Fri, 25 Oct 2024 10:47:39 -0700 Subject: [PATCH 22/39] [InstCombine] Fix a cycle when folding fneg(select) with scalable vector types (#112465) The two folding operations are causing a cycle for the following case with scalable vector types: define @test_fneg_select_abs( %cond, %b) { %1 = select %cond, zeroinitializer, %b %2 = fneg fast %1 ret %2 } 1) fold fneg: -(Cond ? C : Y) -> Cond ? -C : -Y 2) fold select: (Cond ? -X : -Y) -> -(Cond ? X : Y) 1) results in the following since ' zeroinitializer' passes the check for the immediate constant: %.neg = fneg fast zeroinitializer %b.neg = fneg fast %b %1 = select fast %cond, %.neg, %b.neg and so we end up going back and forth between 1) and 2). Attempt to fold scalable vector constants, so that we end up with a splat instead: define @test_fneg_select_abs( %cond, %b) { %b.neg = fneg fast %b %1 = select fast %cond, shufflevector ( insertelement ( poison, double -0.000000e+00, i64 0), poison, zeroinitializer), %b.neg ret %1 } --- llvm/lib/IR/ConstantFold.cpp | 29 ++++++++++--------- llvm/test/Transforms/InstCombine/fneg.ll | 32 +++++++++++++++++++++ llvm/test/Transforms/InstSimplify/fp-nan.ll | 6 ++-- 3 files changed, 49 insertions(+), 18 deletions(-) diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 57d9a03c9c22b83..07dfbc41e79b005 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -581,26 +581,27 @@ Constant *llvm::ConstantFoldUnaryInstruction(unsigned Opcode, Constant *C) { case Instruction::FNeg: return ConstantFP::get(C->getContext(), neg(CV)); } - } else if (auto *VTy = dyn_cast(C->getType())) { - - Type *Ty = IntegerType::get(VTy->getContext(), 32); + } else if (auto *VTy = dyn_cast(C->getType())) { // Fast path for splatted constants. if (Constant *Splat = C->getSplatValue()) if (Constant *Elt = ConstantFoldUnaryInstruction(Opcode, Splat)) return ConstantVector::getSplat(VTy->getElementCount(), Elt); - // Fold each element and create a vector constant from those constants. - SmallVector Result; - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - Constant *ExtractIdx = ConstantInt::get(Ty, i); - Constant *Elt = ConstantExpr::getExtractElement(C, ExtractIdx); - Constant *Res = ConstantFoldUnaryInstruction(Opcode, Elt); - if (!Res) - return nullptr; - Result.push_back(Res); - } + if (auto *FVTy = dyn_cast(VTy)) { + // Fold each element and create a vector constant from those constants. + Type *Ty = IntegerType::get(FVTy->getContext(), 32); + SmallVector Result; + for (unsigned i = 0, e = FVTy->getNumElements(); i != e; ++i) { + Constant *ExtractIdx = ConstantInt::get(Ty, i); + Constant *Elt = ConstantExpr::getExtractElement(C, ExtractIdx); + Constant *Res = ConstantFoldUnaryInstruction(Opcode, Elt); + if (!Res) + return nullptr; + Result.push_back(Res); + } - return ConstantVector::get(Result); + return ConstantVector::get(Result); + } } // We don't know how to fold this. diff --git a/llvm/test/Transforms/InstCombine/fneg.ll b/llvm/test/Transforms/InstCombine/fneg.ll index 3c4088832feaaa6..6a9b3309bb347ec 100644 --- a/llvm/test/Transforms/InstCombine/fneg.ll +++ b/llvm/test/Transforms/InstCombine/fneg.ll @@ -1109,4 +1109,36 @@ define float @test_fneg_select_maxnum(float %x) { ret float %neg } +; Check that there's no infinite loop. +define @test_fneg_select_svec( %cond, %b) { +; CHECK-LABEL: @test_fneg_select_svec( +; CHECK-NEXT: [[TMP2:%.*]] = fneg fast [[TMP1:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = select fast [[COND:%.*]], shufflevector ( insertelement ( poison, double -0.000000e+00, i64 0), poison, zeroinitializer), [[TMP2]] +; CHECK-NEXT: ret [[TMP3]] +; + %1 = select %cond, zeroinitializer, %b + %2 = fneg fast %1 + ret %2 +} + +define @test_fneg_select_svec_2( %cond, %a) { +; CHECK-LABEL: @test_fneg_select_svec_2( +; CHECK-NEXT: [[A_NEG:%.*]] = fneg fast [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select fast [[COND:%.*]], [[A_NEG]], shufflevector ( insertelement ( poison, double -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = select %cond, %a, zeroinitializer + %2 = fneg fast %1 + ret %2 +} + +define @test_fneg_select_svec_3( %cond, %b) { +; CHECK-LABEL: @test_fneg_select_svec_3( +; CHECK-NEXT: ret shufflevector ( insertelement ( poison, double -0.000000e+00, i64 0), poison, zeroinitializer) +; + %1 = select %cond, zeroinitializer, zeroinitializer + %2 = fneg fast %1 + ret %2 +} + !0 = !{} diff --git a/llvm/test/Transforms/InstSimplify/fp-nan.ll b/llvm/test/Transforms/InstSimplify/fp-nan.ll index bb557500822c143..06b23200bafff81 100644 --- a/llvm/test/Transforms/InstSimplify/fp-nan.ll +++ b/llvm/test/Transforms/InstSimplify/fp-nan.ll @@ -237,8 +237,7 @@ define <2 x double> @unary_fneg_nan_2(<2 x double> %x) { ; FIXME: This doesn't behave the same way as the fixed-length vectors above define @unary_fneg_nan_2_scalable_vec_0() { ; CHECK-LABEL: @unary_fneg_nan_2_scalable_vec_0( -; CHECK-NEXT: [[R:%.*]] = fneg shufflevector ( insertelement ( poison, double 0xFFF1234567890ABC, i64 0), poison, zeroinitializer) -; CHECK-NEXT: ret [[R]] +; CHECK-NEXT: ret shufflevector ( insertelement ( poison, double 0x7FF1234567890ABC, i64 0), poison, zeroinitializer) ; %r = fneg splat (double 0xFFF1234567890ABC) ret %r @@ -247,8 +246,7 @@ define @unary_fneg_nan_2_scalable_vec_0() { ; FIXME: This doesn't behave the same way as the fixed-length vectors above define @unary_fneg_nan_2_scalable_vec_1() { ; CHECK-LABEL: @unary_fneg_nan_2_scalable_vec_1( -; CHECK-NEXT: [[R:%.*]] = fneg shufflevector ( insertelement ( poison, double 0x7FF0000000000001, i64 0), poison, zeroinitializer) -; CHECK-NEXT: ret [[R]] +; CHECK-NEXT: ret shufflevector ( insertelement ( poison, double 0xFFF0000000000001, i64 0), poison, zeroinitializer) ; %r = fneg splat (double 0x7FF0000000000001) ret %r From 843c2fbe7f983c2a2059f753e4494f06fb645a9e Mon Sep 17 00:00:00 2001 From: Kiran Chandramohan Date: Fri, 25 Oct 2024 18:57:01 +0100 Subject: [PATCH 23/39] Add parser+semantics support for scope construct (#113700) Test parsing, semantics and a couple of basic semantic checks for block/worksharing constructs. Add TODO message in lowering. --- .../flang/Semantics/openmp-directive-sets.h | 2 ++ flang/lib/Lower/OpenMP/OpenMP.cpp | 12 ++++++++++ flang/lib/Parser/openmp-parsers.cpp | 1 + flang/lib/Parser/unparse.cpp | 3 +++ flang/lib/Semantics/check-omp-structure.cpp | 7 +++++- flang/lib/Semantics/resolve-directives.cpp | 2 ++ flang/test/Lower/OpenMP/Todo/scope.f90 | 13 ++++++++++ flang/test/Parser/OpenMP/scope.f90 | 24 +++++++++++++++++++ .../test/Semantics/OpenMP/invalid-branch.f90 | 8 +++++++ flang/test/Semantics/OpenMP/nested01.f90 | 7 ++++++ llvm/include/llvm/Frontend/OpenMP/OMP.td | 10 +++++++- 11 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 flang/test/Lower/OpenMP/Todo/scope.f90 create mode 100644 flang/test/Parser/OpenMP/scope.f90 diff --git a/flang/include/flang/Semantics/openmp-directive-sets.h b/flang/include/flang/Semantics/openmp-directive-sets.h index 8eb736bb098fe4e..50d6d5b59ef7dd9 100644 --- a/flang/include/flang/Semantics/openmp-directive-sets.h +++ b/flang/include/flang/Semantics/openmp-directive-sets.h @@ -211,6 +211,7 @@ static const OmpDirectiveSet blockConstructSet{ Directive::OMPD_parallel, Directive::OMPD_parallel_masked, Directive::OMPD_parallel_workshare, + Directive::OMPD_scope, Directive::OMPD_single, Directive::OMPD_target, Directive::OMPD_target_data, @@ -281,6 +282,7 @@ static const OmpDirectiveSet workShareSet{ Directive::OMPD_workshare, Directive::OMPD_parallel_workshare, Directive::OMPD_parallel_sections, + Directive::OMPD_scope, Directive::OMPD_sections, Directive::OMPD_single, } | allDoSet, diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index fc54da8babe63e9..01a40d6e2204ef2 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1650,6 +1650,15 @@ genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return sectionsOp; } +static void genScopeOp(lower::AbstractConverter &converter, + lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { + TODO(loc, "Scope construct"); +} + static mlir::omp::SingleOp genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2478,6 +2487,9 @@ static void genOMPDispatch(lower::AbstractConverter &converter, case llvm::omp::Directive::OMPD_simd: genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item); break; + case llvm::omp::Directive::OMPD_scope: + genScopeOp(converter, symTable, semaCtx, eval, loc, queue, item); + break; case llvm::omp::Directive::OMPD_single: genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); break; diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 59a8757e58e8cc4..e740c421ca80276 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -697,6 +697,7 @@ TYPE_PARSER(construct(first( "PARALLEL MASKED" >> pure(llvm::omp::Directive::OMPD_parallel_masked), "PARALLEL WORKSHARE" >> pure(llvm::omp::Directive::OMPD_parallel_workshare), "PARALLEL" >> pure(llvm::omp::Directive::OMPD_parallel), + "SCOPE" >> pure(llvm::omp::Directive::OMPD_scope), "SINGLE" >> pure(llvm::omp::Directive::OMPD_single), "TARGET DATA" >> pure(llvm::omp::Directive::OMPD_target_data), "TARGET PARALLEL" >> pure(llvm::omp::Directive::OMPD_target_parallel), diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 04df988223e8f8d..19ceb2a3ebc3178 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2386,6 +2386,9 @@ class UnparseVisitor { case llvm::omp::Directive::OMPD_parallel: Word("PARALLEL "); break; + case llvm::omp::Directive::OMPD_scope: + Word("SCOPE "); + break; case llvm::omp::Directive::OMPD_single: Word("SINGLE "); break; diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 46486907ceb9e1f..1c2cf304d0ee95f 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -972,6 +972,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) { HasInvalidWorksharingNesting( beginDir.source, llvm::omp::nestedWorkshareErrSet); break; + case llvm::omp::Directive::OMPD_scope: case llvm::omp::Directive::OMPD_single: // TODO: This check needs to be extended while implementing nesting of // regions checks. @@ -1864,6 +1865,9 @@ void OmpStructureChecker::Enter(const parser::OmpEndBlockDirective &x) { const auto &dir{std::get(x.t)}; ResetPartialContext(dir.source); switch (dir.v) { + case llvm::omp::Directive::OMPD_scope: + PushContextAndClauseSets(dir.source, llvm::omp::Directive::OMPD_end_scope); + break; // 2.7.3 end-single-clause -> copyprivate-clause | // nowait-clause case llvm::omp::Directive::OMPD_single: @@ -1886,7 +1890,8 @@ void OmpStructureChecker::Enter(const parser::OmpEndBlockDirective &x) { // end_workshareare popped as they are pushed while entering the // EndBlockDirective. void OmpStructureChecker::Leave(const parser::OmpEndBlockDirective &x) { - if ((GetContext().directive == llvm::omp::Directive::OMPD_end_single) || + if ((GetContext().directive == llvm::omp::Directive::OMPD_end_scope) || + (GetContext().directive == llvm::omp::Directive::OMPD_end_single) || (GetContext().directive == llvm::omp::Directive::OMPD_end_workshare)) { dirContext_.pop_back(); } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 33936ba4c2b34f1..513e42bee976a9a 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -1526,6 +1526,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPBlockConstruct &x) { case llvm::omp::Directive::OMPD_master: case llvm::omp::Directive::OMPD_ordered: case llvm::omp::Directive::OMPD_parallel: + case llvm::omp::Directive::OMPD_scope: case llvm::omp::Directive::OMPD_single: case llvm::omp::Directive::OMPD_target: case llvm::omp::Directive::OMPD_target_data: @@ -1557,6 +1558,7 @@ void OmpAttributeVisitor::Post(const parser::OpenMPBlockConstruct &x) { case llvm::omp::Directive::OMPD_masked: case llvm::omp::Directive::OMPD_parallel_masked: case llvm::omp::Directive::OMPD_parallel: + case llvm::omp::Directive::OMPD_scope: case llvm::omp::Directive::OMPD_single: case llvm::omp::Directive::OMPD_target: case llvm::omp::Directive::OMPD_task: diff --git a/flang/test/Lower/OpenMP/Todo/scope.f90 b/flang/test/Lower/OpenMP/Todo/scope.f90 new file mode 100644 index 000000000000000..16a067dc8f256be --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/scope.f90 @@ -0,0 +1,13 @@ +! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s -fopenmp-version=51 2>&1 | FileCheck %s +! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s -fopenmp-version=51 2>&1 | FileCheck %s + +! CHECK: not yet implemented: Scope construct +program omp_scope + integer i + i = 10 + + !$omp scope private(i) + print *, "omp scope", i + !$omp end scope + +end program omp_scope diff --git a/flang/test/Parser/OpenMP/scope.f90 b/flang/test/Parser/OpenMP/scope.f90 new file mode 100644 index 000000000000000..6574136311e7187 --- /dev/null +++ b/flang/test/Parser/OpenMP/scope.f90 @@ -0,0 +1,24 @@ +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s + +program omp_scope + integer i + i = 10 + +!CHECK: !$OMP SCOPE PRIVATE(i) +!CHECK: !$OMP END SCOPE + +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct +!PARSE-TREE: OmpBeginBlockDirective +!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = scope +!PARSE-TREE: OmpClauseList -> OmpClause -> Private -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'i' +!PARSE-TREE: Block +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> PrintStmt +!PARSE-TREE: OmpEndBlockDirective +!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = scope +!PARSE-TREE: OmpClauseList -> OmpClause -> Nowait + + !$omp scope private(i) + print *, "omp scope", i + !$omp end scope nowait +end program omp_scope diff --git a/flang/test/Semantics/OpenMP/invalid-branch.f90 b/flang/test/Semantics/OpenMP/invalid-branch.f90 index ed9e4d268f65a8c..28aab8b122f3f2c 100644 --- a/flang/test/Semantics/OpenMP/invalid-branch.f90 +++ b/flang/test/Semantics/OpenMP/invalid-branch.f90 @@ -105,4 +105,12 @@ program omp_invalid_branch !$omp end parallel 9 print *, "2nd alternate return" + !CHECK: invalid branch into an OpenMP structured block + goto 100 + !$omp scope + 100 continue + !CHECK: invalid branch leaving an OpenMP structured block + goto 200 + !$omp end scope + 200 continue end program diff --git a/flang/test/Semantics/OpenMP/nested01.f90 b/flang/test/Semantics/OpenMP/nested01.f90 index 49c964ab86aa6bd..0936e4c1b45a5db 100644 --- a/flang/test/Semantics/OpenMP/nested01.f90 +++ b/flang/test/Semantics/OpenMP/nested01.f90 @@ -25,6 +25,13 @@ !$omp end target enddo + !$omp do + do i = 1, N + !ERROR: A worksharing region may not be closely nested inside a worksharing, explicit task, taskloop, critical, ordered, atomic, or master region + !$omp scope + !$omp end scope + end do + !$omp end do !$omp do do i = 1, N diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 1834ad4d037f3d9..d592f369a17f92c 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -892,7 +892,7 @@ def OMP_Scan : Directive<"scan"> { let association = AS_Separating; let category = CA_Subsidiary; } -def OMP_scope : Directive<"scope"> { +def OMP_Scope : Directive<"scope"> { let allowedClauses = [ VersionedClause, VersionedClause, @@ -905,6 +905,14 @@ def OMP_scope : Directive<"scope"> { let association = AS_Block; let category = CA_Executable; } +def OMP_EndScope : Directive<"end scope"> { + let allowedOnceClauses = [ + VersionedClause, + ]; + let leafConstructs = OMP_Scope.leafConstructs; + let association = OMP_Scope.association; + let category = OMP_Scope.category; +} def OMP_Section : Directive<"section"> { let association = AS_Separating; let category = CA_Subsidiary; From 144ddca9ed6a439ad8a421c3ff2ea763532341ba Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Fri, 25 Oct 2024 11:09:57 -0700 Subject: [PATCH 24/39] [MemProf] Avoid duplicate edges between nodes (#113337) The recent change to add support for cloning indirect calls inadvertantly caused duplicate edges to be created between the same caller/callee pair. This is due to the new moveCalleeEdgeToNewCaller not properly guarding the addition of a new edge (ironically I was testing for that in an assertion, but failed to handle that case specially otherwise). Now simply move the context ids over to any existing edge. This issue in turn led to some assumptions in cloning being violated, resulting in a later crash. Add a test for this case to checkNode. --- .../IPO/MemProfContextDisambiguation.cpp | 21 +++++++++++- llvm/test/ThinLTO/X86/memprof-icp.ll | 34 +++++++++++++++---- 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 4efd683dfca3633..905186edcbecc40 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -1352,6 +1352,17 @@ static void checkNode(const ContextNode *Node, } assert(NodeContextIds == CalleeEdgeContextIds); } + // FIXME: Since this checking is only invoked under an option, we should + // change the error checking from using assert to something that will trigger + // an error on a release build. +#ifndef NDEBUG + // Make sure we don't end up with duplicate edges between the same caller and + // callee. + DenseSet *> NodeSet; + for (const auto &E : Node->CalleeEdges) + NodeSet.insert(E->Callee); + assert(NodeSet.size() == Node->CalleeEdges.size()); +#endif } template @@ -3125,7 +3136,15 @@ void CallsiteContextGraph:: // from the same callers as the old node. That should be true in the current // use case, where we will remove None-type edges after copying over all // caller edges from the callee. - assert(IsNewNode || NewCaller->findEdgeFromCaller(OldCallerEdge->Caller)); + auto *ExistingCallerEdge = + NewCaller->findEdgeFromCaller(OldCallerEdge->Caller); + assert(IsNewNode || ExistingCallerEdge); + if (ExistingCallerEdge) { + ExistingCallerEdge->getContextIds().insert(EdgeContextIdsToMove.begin(), + EdgeContextIdsToMove.end()); + ExistingCallerEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove); + continue; + } auto NewEdge = std::make_shared( NewCaller, OldCallerEdge->Caller, computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove); diff --git a/llvm/test/ThinLTO/X86/memprof-icp.ll b/llvm/test/ThinLTO/X86/memprof-icp.ll index f17e19e1f77ef25..99e071898765567 100644 --- a/llvm/test/ThinLTO/X86/memprof-icp.ll +++ b/llvm/test/ThinLTO/X86/memprof-icp.ll @@ -186,9 +186,13 @@ ; REMARKS-MAIN: created clone _ZN2B03barEj.memprof.1 ; REMARKS-MAIN: call in clone _ZN2B03barEj marked with memprof allocation attribute notcold ; REMARKS-MAIN: call in clone _ZN2B03barEj.memprof.1 marked with memprof allocation attribute cold +; REMARKS-MAIN: call in clone _ZN2B03barEj marked with memprof allocation attribute notcold +; REMARKS-MAIN: call in clone _ZN2B03barEj.memprof.1 marked with memprof allocation attribute cold ; REMARKS-MAIN: created clone _ZN1B3barEj.memprof.1 ; REMARKS-MAIN: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold ; REMARKS-MAIN: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold +; REMARKS-MAIN: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold +; REMARKS-MAIN: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold ; REMARKS-FOO: created clone _Z3fooR2B0j.memprof.1 ;; In each version of foo we should have promoted the indirect call to two conditional ;; direct calls, one to B::bar and one to B0::bar. The cloned version of foo should call @@ -208,10 +212,10 @@ ; REMARKS-FOO: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold ; REMARKS-FOO: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold -; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during whole program analysis -; STATS-BE: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend -; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during whole program analysis -; STATS-BE: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend +; STATS: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during whole program analysis +; STATS-BE: 8 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend +; STATS: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during whole program analysis +; STATS-BE: 8 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend ; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis ; STATS-BE: 5 memprof-context-disambiguation - Number of function clones created during ThinLTO backend @@ -247,8 +251,8 @@ ; IR: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold" ; IR: attributes #[[COLD]] = {{.*}} "memprof"="cold" -; STATS-BE-DISTRIB: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend -; STATS-BE-DISTRIB: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend +; STATS-BE-DISTRIB: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend +; STATS-BE-DISTRIB: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend ; STATS-BE-DISTRIB: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend ;--- foo.ll @@ -298,6 +302,9 @@ declare i32 @_Z3fooR2B0j(ptr, i32) define i32 @_ZN2B03barEj(ptr %this, i32 %s) { entry: %call = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !33, !callsite !38 + ;; Second allocation in this function, to ensure that indirect edges to the + ;; same callee are partitioned correctly. + %call2 = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !45, !callsite !50 store volatile i32 0, ptr %call, align 4 ret i32 0 } @@ -311,6 +318,9 @@ declare void @_ZdlPvm() define i32 @_ZN1B3barEj(ptr %this, i32 %s) { entry: %call = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !39, !callsite !44 + ;; Second allocation in this function, to ensure that indirect edges to the + ;; same callee are partitioned correctly. + %call2 = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !51, !callsite !56 store volatile i32 0, ptr %call, align 4 ret i32 0 } @@ -367,3 +377,15 @@ attributes #0 = { builtin allocsize(0) } !42 = !{!43, !"cold"} !43 = !{i64 4457553070050523782, i64 -2101080423462424381, i64 -6490791336773930154} !44 = !{i64 4457553070050523782} +!45 = !{!46, !48} +!46 = !{!47, !"notcold"} +!47 = !{i64 456, i64 -2101080423462424381, i64 5188446645037944434} +!48 = !{!49, !"cold"} +!49 = !{i64 456, i64 -2101080423462424381, i64 5583420417449503557} +!50 = !{i64 456} +!51 = !{!52, !54} +!52 = !{!53, !"notcold"} +!53 = !{i64 789, i64 -2101080423462424381, i64 132626519179914298} +!54 = !{!55, !"cold"} +!55 = !{i64 789, i64 -2101080423462424381, i64 -6490791336773930154} +!56 = !{i64 789} From f4db221258cb44a8f9804ce852c0403328de39b2 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 25 Oct 2024 11:12:41 -0700 Subject: [PATCH 25/39] [libc++][test] Use `ADDITIONAL_COMPILE_FLAGS(gcc-style-warnings)` for `-Wno-psabi` (#113608) MSVC doesn't understand `-Wno-psabi`, which was introduced here by @ldionne in #106077. Using `ADDITIONAL_COMPILE_FLAGS(gcc-style-warnings)` (implemented by #75317) avoids passing this to MSVC. --- .../std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp b/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp index 5130758d5efd52d..abb12d6a3c24730 100644 --- a/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp +++ b/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp @@ -18,7 +18,7 @@ // Ignore diagnostic about vector types changing the ABI on some targets, since // that is irrelevant for this test. -// ADDITIONAL_COMPILE_FLAGS: -Wno-psabi +// ADDITIONAL_COMPILE_FLAGS(gcc-style-warnings): -Wno-psabi #include #include From a0c318938a528cfbef509a2516b36dd2411a52b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= <41161573+gbossu@users.noreply.github.com> Date: Fri, 25 Oct 2024 20:19:22 +0200 Subject: [PATCH 26/39] [CodeGen][NFC] Properly split MachineLICM and EarlyMachineLICM (#113573) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both are based on MachineLICMBase, and the functionality there is "switched" based on a PreRegAlloc flag. This commit is simply about trusting the original value of that flag, defined by the `MachineLICM` and `EarlyMachineLICM` classes. The `PreRegAlloc` flag used to be overwritten it based on MRI.isSSA(), which is un-reliable due to how it is inferred by the MIRParser. I see that we can now define isSSA in MIR (thanks @gargaroff ), meaning the fix isn’t really needed anymore, but redefining that flag still feels wrong. Note that I'm looking into upstreaming more changes to MachineLICM, see [the discourse thread](https://discourse.llvm.org/t/extending-post-regalloc-machinelicm/82725). --- llvm/lib/CodeGen/MachineLICM.cpp | 6 ------ llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir | 9 +-------- llvm/test/CodeGen/AMDGPU/licm-regpressure.mir | 4 ++-- llvm/test/CodeGen/AMDGPU/licm-valu.mir | 4 ++-- llvm/test/CodeGen/X86/unfoldMemoryOperand.mir | 2 +- llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir | 4 ++-- 6 files changed, 8 insertions(+), 21 deletions(-) diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 793ad75759ccb86..7ea07862b839d02 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -391,12 +391,6 @@ bool MachineLICMImpl::run(MachineFunction &MF) { MRI = &MF.getRegInfo(); SchedModel.init(&ST); - // FIXME: Remove this assignment or convert to an assert? (dead variable PreRegAlloc) - // MachineLICM and PostRAMachineLICM were distinguished by introducing - // EarlyMachineLICM and MachineLICM respectively to avoid "using an unreliable - // MRI::isSSA() check to determine whether register allocation has happened" - // (See 4a7c8e7). - PreRegAlloc = MRI->isSSA(); HasProfileData = MF.getFunction().hasProfileData(); if (PreRegAlloc) diff --git a/llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir b/llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir index 406025c4fde3022..90ff68d30a3a0e5 100644 --- a/llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir +++ b/llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir @@ -3,9 +3,6 @@ --- name: test tracksRegLiveness: true -isSSA: false -registers: - - { id: 0, class: gpr64 } stack: - { id: 0, size: 8, type: spill-slot } body: | @@ -30,14 +27,11 @@ body: | bb.2: liveins: $x0 - %0 = COPY $x0 ... + --- name: test2 tracksRegLiveness: true -isSSA: false -registers: - - { id: 0, class: gpr64 } stack: - { id: 0, size: 8, type: spill-slot } body: | @@ -62,5 +56,4 @@ body: | bb.2: liveins: $x0 - %0 = COPY $x0 ... diff --git a/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir b/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir index e63009fdcb43cf2..dd478f94e1039ec 100644 --- a/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir +++ b/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass machinelicm -o - %s | FileCheck -check-prefix=GCN %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes machinelicm -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass early-machinelicm -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes early-machinelicm -o - %s | FileCheck -check-prefix=GCN %s # MachineLICM shall limit hoisting of V_CVT instructions out of the loop keeping # register pressure within the budget. VGPR budget at occupancy 10 is 24 vgprs. diff --git a/llvm/test/CodeGen/AMDGPU/licm-valu.mir b/llvm/test/CodeGen/AMDGPU/licm-valu.mir index b4f5e057f532b51..6a28eee19d503cf 100644 --- a/llvm/test/CodeGen/AMDGPU/licm-valu.mir +++ b/llvm/test/CodeGen/AMDGPU/licm-valu.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machinelicm -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=machinelicm -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=early-machinelicm -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=early-machinelicm -o - %s | FileCheck -check-prefix=GCN %s --- name: hoist_move diff --git a/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir b/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir index ff3d9ca378dbd52..135b14d6836a090 100644 --- a/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir +++ b/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-- -passes machinelicm -mcpu=skx -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=x86_64-- -passes early-machinelicm -mcpu=skx -verify-machineinstrs -o - %s | FileCheck %s --- | @x = dso_local global i32 0, align 4 @z = dso_local local_unnamed_addr global [1024 x i32] zeroinitializer, align 16 diff --git a/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir b/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir index d4d59e14724ebe7..b65a0e71af1dd2d 100644 --- a/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir +++ b/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir @@ -1,6 +1,6 @@ --- | - ; RUN: llc -run-pass=machinelicm -o - %s | FileCheck %s - ; RUN: llc -passes=machinelicm -o - %s | FileCheck %s + ; RUN: llc -run-pass=early-machinelicm -o - %s | FileCheck %s + ; RUN: llc -passes=early-machinelicm -o - %s | FileCheck %s ; Line numbers should not be retained when loop invariant instructions are hoisted. ; Doing so causes poor stepping bevavior. ; From eccdb2489483ca58d2cb35bc38967a8e33117575 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Fri, 25 Oct 2024 13:19:58 -0500 Subject: [PATCH 27/39] [OpenMP] Create versioned libgomp softlinks (#112973) Add libgomp.1.dylib for MacOS and libgomp.so.1 for Linux Linkers on Mac and Linux pick up versioned libgomp dynamic library files. The existing softlinks (libgomp.dylib for MacOS and libgomp.so for Linux) are insufficient. This helps alleviate the issue of mixing libgomp and libomp at runtime. --- openmp/runtime/src/CMakeLists.txt | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 439cc20963a1298..61c0bacc9f20629 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -253,6 +253,17 @@ if(NOT WIN32) libiomp5${LIBOMP_LIBRARY_SUFFIX} WORKING_DIRECTORY ${LIBOMP_LIBRARY_DIR} ) + if(LIBOMP_ENABLE_SHARED) + if(APPLE) + set(VERSIONED_LIBGOMP_NAME libgomp.1${LIBOMP_LIBRARY_SUFFIX}) + else() + set(VERSIONED_LIBGOMP_NAME libgomp${LIBOMP_LIBRARY_SUFFIX}.1) + endif() + add_custom_command(TARGET omp POST_BUILD + COMMAND ${CMAKE_COMMAND} -E create_symlink ${LIBOMP_LIB_FILE} ${VERSIONED_LIBGOMP_NAME} + WORKING_DIRECTORY ${LIBOMP_LIBRARY_DIR} + ) + endif() endif() # Definitions for testing, for reuse when testing libomptarget-nvptx. @@ -439,13 +450,18 @@ else() if(${LIBOMP_INSTALL_ALIASES}) # Create aliases (symlinks) of the library for backwards compatibility + extend_path(outdir "${CMAKE_INSTALL_PREFIX}" "${OPENMP_INSTALL_LIBDIR}") set(LIBOMP_ALIASES "libgomp;libiomp5") foreach(alias IN LISTS LIBOMP_ALIASES) - extend_path(outdir "${CMAKE_INSTALL_PREFIX}" "${OPENMP_INSTALL_LIBDIR}") install(CODE "execute_process(COMMAND \"\${CMAKE_COMMAND}\" -E create_symlink \"${LIBOMP_LIB_FILE}\" \"${alias}${LIBOMP_LIBRARY_SUFFIX}\" WORKING_DIRECTORY \"\$ENV{DESTDIR}${outdir}\")") endforeach() + if(LIBOMP_ENABLE_SHARED) + install(CODE "execute_process(COMMAND \"\${CMAKE_COMMAND}\" -E create_symlink \"${LIBOMP_LIB_FILE}\" + \"${VERSIONED_LIBGOMP_NAME}\" WORKING_DIRECTORY + \"\$ENV{DESTDIR}${outdir}\")") + endif() endif() endif() install( From 88cc7ac0cc43a739c25f6988c1bfe3949ca4da62 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 25 Oct 2024 14:21:43 -0400 Subject: [PATCH 28/39] [libc++][NFC] Remove unused functions from posix_l_fallbacks (#113709) --- .../__support/xlocale/__posix_l_fallback.h | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/libcxx/include/__support/xlocale/__posix_l_fallback.h b/libcxx/include/__support/xlocale/__posix_l_fallback.h index 8a3a6f27f48dde9..c83589181747094 100644 --- a/libcxx/include/__support/xlocale/__posix_l_fallback.h +++ b/libcxx/include/__support/xlocale/__posix_l_fallback.h @@ -25,24 +25,10 @@ # include #endif -inline _LIBCPP_HIDE_FROM_ABI int isalnum_l(int __c, locale_t) { return ::isalnum(__c); } - -inline _LIBCPP_HIDE_FROM_ABI int isalpha_l(int __c, locale_t) { return ::isalpha(__c); } - -inline _LIBCPP_HIDE_FROM_ABI int iscntrl_l(int __c, locale_t) { return ::iscntrl(__c); } - inline _LIBCPP_HIDE_FROM_ABI int isdigit_l(int __c, locale_t) { return ::isdigit(__c); } -inline _LIBCPP_HIDE_FROM_ABI int isgraph_l(int __c, locale_t) { return ::isgraph(__c); } - inline _LIBCPP_HIDE_FROM_ABI int islower_l(int __c, locale_t) { return ::islower(__c); } -inline _LIBCPP_HIDE_FROM_ABI int isprint_l(int __c, locale_t) { return ::isprint(__c); } - -inline _LIBCPP_HIDE_FROM_ABI int ispunct_l(int __c, locale_t) { return ::ispunct(__c); } - -inline _LIBCPP_HIDE_FROM_ABI int isspace_l(int __c, locale_t) { return ::isspace(__c); } - inline _LIBCPP_HIDE_FROM_ABI int isupper_l(int __c, locale_t) { return ::isupper(__c); } inline _LIBCPP_HIDE_FROM_ABI int isxdigit_l(int __c, locale_t) { return ::isxdigit(__c); } @@ -52,8 +38,6 @@ inline _LIBCPP_HIDE_FROM_ABI int toupper_l(int __c, locale_t) { return ::toupper inline _LIBCPP_HIDE_FROM_ABI int tolower_l(int __c, locale_t) { return ::tolower(__c); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -inline _LIBCPP_HIDE_FROM_ABI int iswalnum_l(wint_t __c, locale_t) { return ::iswalnum(__c); } - inline _LIBCPP_HIDE_FROM_ABI int iswalpha_l(wint_t __c, locale_t) { return ::iswalpha(__c); } inline _LIBCPP_HIDE_FROM_ABI int iswblank_l(wint_t __c, locale_t) { return ::iswblank(__c); } @@ -62,8 +46,6 @@ inline _LIBCPP_HIDE_FROM_ABI int iswcntrl_l(wint_t __c, locale_t) { return ::isw inline _LIBCPP_HIDE_FROM_ABI int iswdigit_l(wint_t __c, locale_t) { return ::iswdigit(__c); } -inline _LIBCPP_HIDE_FROM_ABI int iswgraph_l(wint_t __c, locale_t) { return ::iswgraph(__c); } - inline _LIBCPP_HIDE_FROM_ABI int iswlower_l(wint_t __c, locale_t) { return ::iswlower(__c); } inline _LIBCPP_HIDE_FROM_ABI int iswprint_l(wint_t __c, locale_t) { return ::iswprint(__c); } From 4ac0e7e400fe2a66d1fd5d5d1fa1c899dfb16716 Mon Sep 17 00:00:00 2001 From: Gang Chen Date: Fri, 25 Oct 2024 11:24:47 -0700 Subject: [PATCH 29/39] [AMDGPU] Add a type for the named barrier (#113614) --- clang/include/clang/Basic/AMDGPUTypes.def | 8 ++++ clang/lib/CodeGen/CGDebugInfo.cpp | 7 ++++ clang/lib/CodeGen/CodeGenTypes.cpp | 4 ++ clang/test/AST/ast-dump-amdgpu-types.c | 13 ++++-- .../CodeGen/amdgpu-barrier-type-debug-info.c | 8 ++++ .../CodeGenCXX/amdgpu-barrier-typeinfo.cpp | 10 +++++ clang/test/CodeGenHIP/amdgpu-barrier-type.hip | 42 +++++++++++++++++++ clang/test/SemaCXX/amdgpu-barrier.cpp | 17 ++++++++ clang/test/SemaHIP/amdgpu-barrier.hip | 20 +++++++++ clang/test/SemaOpenCL/amdgpu-barrier.cl | 12 ++++++ clang/test/SemaOpenMP/amdgpu-barrier.cpp | 17 ++++++++ llvm/lib/IR/Type.cpp | 14 +++++++ 12 files changed, 168 insertions(+), 4 deletions(-) create mode 100644 clang/test/CodeGen/amdgpu-barrier-type-debug-info.c create mode 100644 clang/test/CodeGenCXX/amdgpu-barrier-typeinfo.cpp create mode 100644 clang/test/CodeGenHIP/amdgpu-barrier-type.hip create mode 100644 clang/test/SemaCXX/amdgpu-barrier.cpp create mode 100644 clang/test/SemaHIP/amdgpu-barrier.hip create mode 100644 clang/test/SemaOpenCL/amdgpu-barrier.cl create mode 100644 clang/test/SemaOpenMP/amdgpu-barrier.cpp diff --git a/clang/include/clang/Basic/AMDGPUTypes.def b/clang/include/clang/Basic/AMDGPUTypes.def index e47e544fdc82c1c..d3dff446f9edf01 100644 --- a/clang/include/clang/Basic/AMDGPUTypes.def +++ b/clang/include/clang/Basic/AMDGPUTypes.def @@ -15,7 +15,15 @@ AMDGPU_TYPE(Name, Id, SingletonId, Width, Align) #endif +#ifndef AMDGPU_NAMED_BARRIER_TYPE +#define AMDGPU_NAMED_BARRIER_TYPE(Name, Id, SingletonId, Width, Align, Scope) \ + AMDGPU_TYPE(Name, Id, SingletonId, Width, Align) +#endif + AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_buffer_rsrc_t", AMDGPUBufferRsrc, AMDGPUBufferRsrcTy, 128, 128, 8) +AMDGPU_NAMED_BARRIER_TYPE("__amdgpu_named_workgroup_barrier_t", AMDGPUNamedWorkgroupBarrier, AMDGPUNamedWorkgroupBarrierTy, 128, 32, 0) + #undef AMDGPU_TYPE #undef AMDGPU_OPAQUE_PTR_TYPE +#undef AMDGPU_NAMED_BARRIER_TYPE diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 59a761c2303c951..5fd6cfa63e6efab 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -916,6 +916,13 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { TheCU, TheCU->getFile(), 0); \ return SingletonId; \ } +#define AMDGPU_NAMED_BARRIER_TYPE(Name, Id, SingletonId, Width, Align, Scope) \ + case BuiltinType::Id: { \ + if (!SingletonId) \ + SingletonId = \ + DBuilder.createBasicType(Name, Width, llvm::dwarf::DW_ATE_unsigned); \ + return SingletonId; \ + } #include "clang/Basic/AMDGPUTypes.def" case BuiltinType::UChar: case BuiltinType::Char_U: diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index f87184fc77832ca..09191a4901f4932 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -564,6 +564,10 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { #define AMDGPU_OPAQUE_PTR_TYPE(Name, Id, SingletonId, Width, Align, AS) \ case BuiltinType::Id: \ return llvm::PointerType::get(getLLVMContext(), AS); +#define AMDGPU_NAMED_BARRIER_TYPE(Name, Id, SingletonId, Width, Align, Scope) \ + case BuiltinType::Id: \ + return llvm::TargetExtType::get(getLLVMContext(), "amdgcn.named.barrier", \ + {}, {Scope}); #include "clang/Basic/AMDGPUTypes.def" #define HLSL_INTANGIBLE_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/HLSLIntangibleTypes.def" diff --git a/clang/test/AST/ast-dump-amdgpu-types.c b/clang/test/AST/ast-dump-amdgpu-types.c index e032d678f1a09e8..f01461cdba2374e 100644 --- a/clang/test/AST/ast-dump-amdgpu-types.c +++ b/clang/test/AST/ast-dump-amdgpu-types.c @@ -1,10 +1,15 @@ // REQUIRES: amdgpu-registered-target // Test without serialization: -// RUN: %clang_cc1 -triple amdgcn -ast-dump -ast-dump-filter __amdgpu_buffer_rsrc_t %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn -ast-dump -ast-dump-filter __amdgpu_buffer_rsrc_t %s | FileCheck %s -check-prefix=BUFFER-RSRC +// RUN: %clang_cc1 -triple amdgcn -ast-dump -ast-dump-filter __amdgpu_named_workgroup_barrier %s | FileCheck %s -check-prefix=WORKGROUP-BARRIER // // Test with serialization: // RUN: %clang_cc1 -triple amdgcn -emit-pch -o %t %s -// RUN: %clang_cc1 -x c -triple amdgcn -include-pch %t -ast-dump-all -ast-dump-filter __amdgpu_buffer_rsrc_t /dev/null | sed -e "s/ //" -e "s/ imported//" | FileCheck %s +// RUN: %clang_cc1 -x c -triple amdgcn -include-pch %t -ast-dump-all -ast-dump-filter __amdgpu_buffer_rsrc_t /dev/null | sed -e "s/ //" -e "s/ imported//" | FileCheck %s -check-prefix=BUFFER-RSRC +// RUN: %clang_cc1 -x c -triple amdgcn -include-pch %t -ast-dump-all -ast-dump-filter __amdgpu_named_workgroup_barrier /dev/null | sed -e "s/ //" -e "s/ imported//" | FileCheck %s -check-prefix=WORKGROUP-BARRIER -// CHECK: TypedefDecl {{.*}} implicit __amdgpu_buffer_rsrc_t -// CHECK-NEXT: -BuiltinType {{.*}} '__amdgpu_buffer_rsrc_t' +// BUFFER-RSRC: TypedefDecl {{.*}} implicit __amdgpu_buffer_rsrc_t +// BUFFER-RSRC-NEXT: -BuiltinType {{.*}} '__amdgpu_buffer_rsrc_t' + +// WORKGROUP-BARRIER: TypedefDecl {{.*}} implicit __amdgpu_named_workgroup_barrier_t +// WORKGROUP-BARRIER-NEXT: -BuiltinType {{.*}} '__amdgpu_named_workgroup_barrier_t' diff --git a/clang/test/CodeGen/amdgpu-barrier-type-debug-info.c b/clang/test/CodeGen/amdgpu-barrier-type-debug-info.c new file mode 100644 index 000000000000000..f595f1b222c4f65 --- /dev/null +++ b/clang/test/CodeGen/amdgpu-barrier-type-debug-info.c @@ -0,0 +1,8 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s -debug-info-kind=limited 2>&1 | FileCheck %s + +// CHECK: name: "__amdgpu_named_workgroup_barrier_t",{{.*}}baseType: ![[BT:[0-9]+]] +// CHECK: [[BT]] = !DIBasicType(name: "__amdgpu_named_workgroup_barrier_t", size: 128, encoding: DW_ATE_unsigned) +void test_locals(void) { + __amdgpu_named_workgroup_barrier_t k0; +} diff --git a/clang/test/CodeGenCXX/amdgpu-barrier-typeinfo.cpp b/clang/test/CodeGenCXX/amdgpu-barrier-typeinfo.cpp new file mode 100644 index 000000000000000..a47f217dcd3db67 --- /dev/null +++ b/clang/test/CodeGenCXX/amdgpu-barrier-typeinfo.cpp @@ -0,0 +1,10 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn %s -emit-llvm -o - | FileCheck %s + +namespace std { class type_info; }; + +auto &b0 = typeid(__amdgpu_named_workgroup_barrier_t); + +// CHECK-DAG: @_ZTSu34__amdgpu_named_workgroup_barrier_t = {{.*}} c"u34__amdgpu_named_workgroup_barrier_t\00" +// CHECK-DAG: @_ZTIu34__amdgpu_named_workgroup_barrier_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu34__amdgpu_named_workgroup_barrier_t + diff --git a/clang/test/CodeGenHIP/amdgpu-barrier-type.hip b/clang/test/CodeGenHIP/amdgpu-barrier-type.hip new file mode 100644 index 000000000000000..229e8b3c737c6aa --- /dev/null +++ b/clang/test/CodeGenHIP/amdgpu-barrier-type.hip @@ -0,0 +1,42 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature + // REQUIRES: amdgpu-registered-target + // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s + +#define __shared__ __attribute__((shared)) + +__shared__ __amdgpu_named_workgroup_barrier_t bar; +__shared__ __amdgpu_named_workgroup_barrier_t arr[2]; +__shared__ struct { + __amdgpu_named_workgroup_barrier_t x; + __amdgpu_named_workgroup_barrier_t y; +} str; + +__amdgpu_named_workgroup_barrier_t *getBar(); +void useBar(__amdgpu_named_workgroup_barrier_t *); + +// CHECK-LABEL: define {{[^@]+}}@_Z7testSemPu34__amdgpu_named_workgroup_barrier_t +// CHECK-SAME: (ptr noundef [[P:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr +// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @_Z6useBarPu34__amdgpu_named_workgroup_barrier_t(ptr noundef [[TMP0]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: call void @_Z6useBarPu34__amdgpu_named_workgroup_barrier_t(ptr noundef addrspacecast (ptr addrspace(1) @bar to ptr)) #[[ATTR2]] +// CHECK-NEXT: call void @_Z6useBarPu34__amdgpu_named_workgroup_barrier_t(ptr noundef getelementptr inbounds ([2 x target("amdgcn.named.barrier", 0)], ptr addrspacecast (ptr addrspace(1) @arr to ptr), i64 0, i64 1)) #[[ATTR2]] +// CHECK-NEXT: call void @_Z6useBarPu34__amdgpu_named_workgroup_barrier_t(ptr noundef getelementptr inbounds nuw ([[STRUCT_ANON:%.*]], ptr addrspacecast (ptr addrspace(1) @str to ptr), i32 0, i32 1)) #[[ATTR2]] +// CHECK-NEXT: [[CALL:%.*]] = call noundef ptr @_Z6getBarv() #[[ATTR2]] +// CHECK-NEXT: call void @_Z6useBarPu34__amdgpu_named_workgroup_barrier_t(ptr noundef [[CALL]]) #[[ATTR2]] +// CHECK-NEXT: [[CALL1:%.*]] = call noundef ptr @_Z6getBarv() #[[ATTR2]] +// CHECK-NEXT: ret ptr [[CALL1]] +// +__amdgpu_named_workgroup_barrier_t *testSem(__amdgpu_named_workgroup_barrier_t *p) { + useBar(p); + useBar(&bar); + useBar(&arr[1]); + useBar(&str.y); + useBar(getBar()); + return getBar(); +} diff --git a/clang/test/SemaCXX/amdgpu-barrier.cpp b/clang/test/SemaCXX/amdgpu-barrier.cpp new file mode 100644 index 000000000000000..a171433727dda41 --- /dev/null +++ b/clang/test/SemaCXX/amdgpu-barrier.cpp @@ -0,0 +1,17 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -fsyntax-only -verify -std=gnu++11 -triple amdgcn -Wno-unused-value %s + +void foo() { + int n = 100; + __amdgpu_named_workgroup_barrier_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_named_workgroup_barrier_t' with an rvalue of type 'int'}} + static_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}} + dynamic_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{invalid target type '__amdgpu_named_workgroup_barrier_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}} + reinterpret_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}} + int c(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_named_workgroup_barrier_t'}} + __amdgpu_named_workgroup_barrier_t k; + int *ip = (int *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'int *'}} + void *vp = (void *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'void *'}} +} + +static_assert(sizeof(__amdgpu_named_workgroup_barrier_t) == 16, "wrong size"); +static_assert(alignof(__amdgpu_named_workgroup_barrier_t) == 4, "wrong alignment"); diff --git a/clang/test/SemaHIP/amdgpu-barrier.hip b/clang/test/SemaHIP/amdgpu-barrier.hip new file mode 100644 index 000000000000000..ccd99b1e2c1f261 --- /dev/null +++ b/clang/test/SemaHIP/amdgpu-barrier.hip @@ -0,0 +1,20 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -fsyntax-only -verify -triple amdgcn -Wno-unused-value %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple amdgcn -Wno-unused-value %s + +#define __device__ __attribute__((device)) + +__device__ void foo() { + int n = 100; + __amdgpu_named_workgroup_barrier_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_named_workgroup_barrier_t' with an rvalue of type 'int'}} + static_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}} + dynamic_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{invalid target type '__amdgpu_named_workgroup_barrier_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}} + reinterpret_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}} + int c(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_named_workgroup_barrier_t'}} + __amdgpu_named_workgroup_barrier_t k; + int *ip = (int *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'int *'}} + void *vp = (void *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'void *'}} +} + +static_assert(sizeof(__amdgpu_named_workgroup_barrier_t) == 16, "wrong size"); +static_assert(alignof(__amdgpu_named_workgroup_barrier_t) == 4, "wrong alignment"); diff --git a/clang/test/SemaOpenCL/amdgpu-barrier.cl b/clang/test/SemaOpenCL/amdgpu-barrier.cl new file mode 100644 index 000000000000000..150c311c7c59303 --- /dev/null +++ b/clang/test/SemaOpenCL/amdgpu-barrier.cl @@ -0,0 +1,12 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -verify -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -Wno-unused-value %s +// RUN: %clang_cc1 -verify -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -Wno-unused-value %s + +void foo() { + int n = 100; + __amdgpu_named_workgroup_barrier_t v = 0; // expected-error {{initializing '__private __amdgpu_named_workgroup_barrier_t' with an expression of incompatible type 'int'}} + int c = v; // expected-error {{initializing '__private int' with an expression of incompatible type '__private __amdgpu_named_workgroup_barrier_t'}} + __amdgpu_named_workgroup_barrier_t k; + int *ip = (int *)k; // expected-error {{operand of type '__amdgpu_named_workgroup_barrier_t' where arithmetic or pointer type is required}} + void *vp = (void *)k; // expected-error {{operand of type '__amdgpu_named_workgroup_barrier_t' where arithmetic or pointer type is required}} + } diff --git a/clang/test/SemaOpenMP/amdgpu-barrier.cpp b/clang/test/SemaOpenMP/amdgpu-barrier.cpp new file mode 100644 index 000000000000000..70aaefd080885e6 --- /dev/null +++ b/clang/test/SemaOpenMP/amdgpu-barrier.cpp @@ -0,0 +1,17 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -triple amdgcn-amd-amdhsa -fopenmp-is-target-device -Wno-unused-value %s + +void foo() { +#pragma omp target + { + int n = 100; + __amdgpu_named_workgroup_barrier_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_named_workgroup_barrier_t' with an rvalue of type 'int'}} + static_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}} + dynamic_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{invalid target type '__amdgpu_named_workgroup_barrier_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}} + reinterpret_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}} + int c(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_named_workgroup_barrier_t'}} + __amdgpu_named_workgroup_barrier_t k; + int *ip = (int *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'int *'}} + void *vp = (void *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'void *'}} + } + } diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp index 912b1a3960ef196..e311cde415174a9 100644 --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -834,6 +834,14 @@ Expected TargetExtType::checkParams(TargetExtType *TTy) { "target extension type riscv.vector.tuple should have one " "type parameter and one integer parameter"); + // Opaque types in the AMDGPU name space. + if (TTy->Name == "amdgcn.named.barrier" && + (TTy->getNumTypeParameters() != 0 || TTy->getNumIntParameters() != 1)) { + return createStringError("target extension type amdgcn.named.barrier " + "should have no type parameters " + "and one integer parameter"); + } + return TTy; } @@ -879,6 +887,12 @@ static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) { if (Name.starts_with("dx.")) return TargetTypeInfo(PointerType::get(C, 0)); + // Opaque types in the AMDGPU name space. + if (Name == "amdgcn.named.barrier") { + return TargetTypeInfo(FixedVectorType::get(Type::getInt32Ty(C), 4), + TargetExtType::CanBeGlobal); + } + return TargetTypeInfo(Type::getVoidTy(C)); } From 61946687bc68ccba763571cb420049b9a3749dfe Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Fri, 25 Oct 2024 11:33:44 -0700 Subject: [PATCH 30/39] [clang][modules] Shrink the size of `Module::Headers` (#113395) This patch shrinks the size of the `Module` class from 2112B to 1624B. I wasn't able to get a good data on the actual impact on memory usage, but given my `clang-scan-deps` workload at hand (with tens of thousands of instances), I think there should be some win here. This also speeds up my benchmark by under 0.1%. --- .../modularize/CoverageChecker.cpp | 7 ++--- .../modularize/ModularizeUtilities.cpp | 14 ++------- clang/include/clang/Basic/Module.h | 31 ++++++++++++++----- clang/lib/Basic/Module.cpp | 2 +- clang/lib/Frontend/FrontendAction.cpp | 2 +- clang/lib/Lex/ModuleMap.cpp | 21 +++++++------ clang/lib/Serialization/ASTWriter.cpp | 4 +-- 7 files changed, 45 insertions(+), 36 deletions(-) diff --git a/clang-tools-extra/modularize/CoverageChecker.cpp b/clang-tools-extra/modularize/CoverageChecker.cpp index 0e76c539aa3c839..b536ee00497c03f 100644 --- a/clang-tools-extra/modularize/CoverageChecker.cpp +++ b/clang-tools-extra/modularize/CoverageChecker.cpp @@ -223,10 +223,9 @@ bool CoverageChecker::collectModuleHeaders(const Module &Mod) { return false; } - for (auto &HeaderKind : Mod.Headers) - for (auto &Header : HeaderKind) - ModuleMapHeadersSet.insert( - ModularizeUtilities::getCanonicalPath(Header.Entry.getName())); + for (const auto &Header : Mod.getAllHeaders()) + ModuleMapHeadersSet.insert( + ModularizeUtilities::getCanonicalPath(Header.Entry.getName())); for (auto *Submodule : Mod.submodules()) collectModuleHeaders(*Submodule); diff --git a/clang-tools-extra/modularize/ModularizeUtilities.cpp b/clang-tools-extra/modularize/ModularizeUtilities.cpp index b202b3aae8f8a3a..476e13770a94f6c 100644 --- a/clang-tools-extra/modularize/ModularizeUtilities.cpp +++ b/clang-tools-extra/modularize/ModularizeUtilities.cpp @@ -358,7 +358,7 @@ bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) { } else if (std::optional UmbrellaDir = Mod.getUmbrellaDirAsWritten()) { // If there normal headers, assume these are umbrellas and skip collection. - if (Mod.Headers->size() == 0) { + if (Mod.getHeaders(Module::HK_Normal).empty()) { // Collect headers in umbrella directory. if (!collectUmbrellaHeaders(UmbrellaDir->Entry.getName(), UmbrellaDependents)) @@ -371,16 +371,8 @@ bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) { // modules or because they are meant to be included by another header, // and thus should be ignored by modularize. - int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size(); - - for (int Index = 0; Index < NormalHeaderCount; ++Index) { - DependentsVector NormalDependents; - // Collect normal header. - const clang::Module::Header &Header( - Mod.Headers[clang::Module::HK_Normal][Index]); - std::string HeaderPath = getCanonicalPath(Header.Entry.getName()); - HeaderFileNames.push_back(HeaderPath); - } + for (const auto &Header : Mod.getHeaders(clang::Module::HK_Normal)) + HeaderFileNames.push_back(getCanonicalPath(Header.Entry.getName())); int MissingCountThisModule = Mod.MissingHeaders.size(); diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 9c5d33fbb562cc9..1ab3b5e5f81567f 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -253,8 +253,6 @@ class alignas(8) Module { HK_PrivateTextual, HK_Excluded }; - static const int NumHeaderKinds = HK_Excluded + 1; - /// Information about a header directive as found in the module map /// file. struct Header { @@ -263,17 +261,36 @@ class alignas(8) Module { FileEntryRef Entry; }; - /// Information about a directory name as found in the module map - /// file. +private: + static const int NumHeaderKinds = HK_Excluded + 1; + // The begin index for a HeaderKind also acts the end index of HeaderKind - 1. + // The extra element at the end acts as the end index of the last HeaderKind. + unsigned HeaderKindBeginIndex[NumHeaderKinds + 1] = {}; + SmallVector HeadersStorage; + +public: + ArrayRef
getAllHeaders() const { return HeadersStorage; } + ArrayRef
getHeaders(HeaderKind HK) const { + assert(HK < NumHeaderKinds && "Invalid Module::HeaderKind"); + auto BeginIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK]; + auto EndIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK + 1]; + return {BeginIt, EndIt}; + } + void addHeader(HeaderKind HK, Header H) { + assert(HK < NumHeaderKinds && "Invalid Module::HeaderKind"); + auto EndIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK + 1]; + HeadersStorage.insert(EndIt, std::move(H)); + for (unsigned HKI = HK + 1; HKI != NumHeaderKinds + 1; ++HKI) + ++HeaderKindBeginIndex[HKI]; + } + + /// Information about a directory name as found in the module map file. struct DirectoryName { std::string NameAsWritten; std::string PathRelativeToRootModuleDirectory; DirectoryEntryRef Entry; }; - /// The headers that are part of this module. - SmallVector Headers[5]; - /// Stored information about a header directive that was found in the /// module map file but has not been resolved to a file. struct UnresolvedHeaderDirective { diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp index ad52fccff5dc7ff..a7a3f6b37efef17 100644 --- a/clang/lib/Basic/Module.cpp +++ b/clang/lib/Basic/Module.cpp @@ -528,7 +528,7 @@ void Module::print(raw_ostream &OS, unsigned Indent, bool Dump) const { for (auto &K : Kinds) { assert(&K == &Kinds[K.Kind] && "kinds in wrong order"); - for (auto &H : Headers[K.Kind]) { + for (auto &H : getHeaders(K.Kind)) { OS.indent(Indent + 2); OS << K.Prefix << "header \""; OS.write_escaped(H.NameAsWritten); diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index 81eea9c4c4dc58e..8264bd702fe43fb 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -358,7 +358,7 @@ static std::error_code collectModuleHeaderIncludes( // Add includes for each of these headers. for (auto HK : {Module::HK_Normal, Module::HK_Private}) { - for (Module::Header &H : Module->Headers[HK]) { + for (const Module::Header &H : Module->getHeaders(HK)) { Module->addTopHeader(H.Entry); // Use the path as specified in the module map file. We'll look for this // file relative to the module build directory (the directory containing diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index 0a02a63deba3dc1..bc76a54abd95adf 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -472,12 +472,12 @@ static bool violatesPrivateInclude(Module *RequestingModule, // as obtained from the lookup and as obtained from the module. // This check is not cheap, so enable it only for debugging. bool IsPrivate = false; - SmallVectorImpl *HeaderList[] = { - &Header.getModule()->Headers[Module::HK_Private], - &Header.getModule()->Headers[Module::HK_PrivateTextual]}; - for (auto *Hs : HeaderList) + ArrayRef HeaderList[] = { + Header.getModule()->getHeaders(Module::HK_Private), + Header.getModule()->getHeaders(Module::HK_PrivateTextual)}; + for (auto Hs : HeaderList) IsPrivate |= llvm::any_of( - *Hs, [&](const Module::Header &H) { return H.Entry == IncFileEnt; }); + Hs, [&](const Module::Header &H) { return H.Entry == IncFileEnt; }); assert(IsPrivate && "inconsistent headers and roles"); } #endif @@ -1296,27 +1296,28 @@ void ModuleMap::addHeader(Module *Mod, Module::Header Header, ModuleHeaderRole Role, bool Imported) { KnownHeader KH(Mod, Role); + FileEntryRef HeaderEntry = Header.Entry; + // Only add each header to the headers list once. // FIXME: Should we diagnose if a header is listed twice in the // same module definition? - auto &HeaderList = Headers[Header.Entry]; + auto &HeaderList = Headers[HeaderEntry]; if (llvm::is_contained(HeaderList, KH)) return; HeaderList.push_back(KH); - Mod->Headers[headerRoleToKind(Role)].push_back(Header); + Mod->addHeader(headerRoleToKind(Role), std::move(Header)); bool isCompilingModuleHeader = Mod->isForBuilding(LangOpts); if (!Imported || isCompilingModuleHeader) { // When we import HeaderFileInfo, the external source is expected to // set the isModuleHeader flag itself. - HeaderInfo.MarkFileModuleHeader(Header.Entry, Role, - isCompilingModuleHeader); + HeaderInfo.MarkFileModuleHeader(HeaderEntry, Role, isCompilingModuleHeader); } // Notify callbacks that we just added a new header. for (const auto &Cb : Callbacks) - Cb->moduleMapAddHeader(Header.Entry.getName()); + Cb->moduleMapAddHeader(HeaderEntry.getName()); } FileID ModuleMap::getContainingModuleMapFileID(const Module *Module) const { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 494890284d2f2c1..b576822fa704c89 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -3070,9 +3070,9 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { Module::HK_PrivateTextual}, {SUBMODULE_EXCLUDED_HEADER, ExcludedHeaderAbbrev, Module::HK_Excluded} }; - for (auto &HL : HeaderLists) { + for (const auto &HL : HeaderLists) { RecordData::value_type Record[] = {HL.RecordKind}; - for (auto &H : Mod->Headers[HL.HeaderKind]) + for (const auto &H : Mod->getHeaders(HL.HeaderKind)) Stream.EmitRecordWithBlob(HL.Abbrev, Record, H.NameAsWritten); } From 9648271a3c5adf875680833ac74eb4bafb48678d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 25 Oct 2024 20:39:45 +0200 Subject: [PATCH 31/39] [LV] Pass flag indicating epilogue is vectorized to executePlan (NFC) This clarifies the flag, which is now only passed if the epilogue loop is being vectorized. --- .../Vectorize/LoopVectorizationPlanner.h | 8 ++++---- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 1c8d541ef2c51fd..b2745c81dec8885 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -435,9 +435,9 @@ class LoopVectorizationPlanner { /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan /// according to the best selected \p VF and \p UF. /// - /// TODO: \p IsEpilogueVectorization is needed to avoid issues due to epilogue - /// vectorization re-using plans for both the main and epilogue vector loops. - /// It should be removed once the re-use issue has been fixed. + /// TODO: \p VectorizingEpilogue indicates if the executed VPlan is for the + /// epilogue vector loop. It should be removed once the re-use issue has been + /// fixed. /// \p ExpandedSCEVs is passed during execution of the plan for epilogue loop /// to re-use expansion results generated during main plan execution. /// @@ -447,7 +447,7 @@ class LoopVectorizationPlanner { DenseMap executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, - bool IsEpilogueVectorization, + bool VectorizingEpilogue, const DenseMap *ExpandedSCEVs = nullptr); #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e1173ddd71af9c5..865f5e3d2e588da 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7626,16 +7626,16 @@ static void createAndCollectMergePhiForReduction( DenseMap LoopVectorizationPlanner::executePlan( ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan, - InnerLoopVectorizer &ILV, DominatorTree *DT, bool IsEpilogueVectorization, + InnerLoopVectorizer &ILV, DominatorTree *DT, bool VectorizingEpilogue, const DenseMap *ExpandedSCEVs) { assert(BestVPlan.hasVF(BestVF) && "Trying to execute plan with unsupported VF"); assert(BestVPlan.hasUF(BestUF) && "Trying to execute plan with unsupported UF"); assert( - (IsEpilogueVectorization || !ExpandedSCEVs) && + ((VectorizingEpilogue && ExpandedSCEVs) || + (!VectorizingEpilogue && !ExpandedSCEVs)) && "expanded SCEVs to reuse can only be used during epilogue vectorization"); - (void)IsEpilogueVectorization; // TODO: Move to VPlan transform stage once the transition to the VPlan-based // cost model is complete for better cost estimates. @@ -7661,8 +7661,8 @@ DenseMap LoopVectorizationPlanner::executePlan( if (!ILV.getTripCount()) ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0))); else - assert(IsEpilogueVectorization && "should only re-use the existing trip " - "count during epilogue vectorization"); + assert(VectorizingEpilogue && "should only re-use the existing trip " + "count during epilogue vectorization"); // 1. Set up the skeleton for vectorization, including vector pre-header and // middle block. The vector loop is created during VPlan execution. @@ -7715,7 +7715,7 @@ DenseMap LoopVectorizationPlanner::executePlan( for (VPRecipeBase &R : *ExitVPBB) { createAndCollectMergePhiForReduction( dyn_cast(&R), State, OrigLoop, - State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs); + State.CFG.VPBB2IRBB[ExitVPBB], VectorizingEpilogue); } // 2.6. Maintain Loop Hints @@ -10233,7 +10233,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { std::unique_ptr BestMainPlan(BestPlan.duplicate()); auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, - *BestMainPlan, MainILV, DT, true); + *BestMainPlan, MainILV, DT, false); ++LoopsVectorized; // Second pass vectorizes the epilogue and adjusts the control flow From 8c4bc1e75de27adfbaead34b895b0efbaf17bd02 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 25 Oct 2024 11:44:20 -0700 Subject: [PATCH 32/39] [mlir][Transforms] Merge 1:1 and 1:N type converters (#113032) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 1:N type converter derived from the 1:1 type converter and extends it with 1:N target materializations. This commit merges the two type converters and stores 1:N target materializations in the 1:1 type converter. This is in preparation of merging the 1:1 and 1:N dialect conversion infrastructures. 1:1 target materializations (producing a single `Value`) will remain valid. An additional API is added to the type converter to register 1:N target materializations (producing a `SmallVector`). Internally, all target materializations are stored as 1:N materializations. The 1:N type converter is removed. Note for LLVM integration: If you are using the `OneToNTypeConverter`, simply switch all occurrences to `TypeConverter`. --------- Co-authored-by: Markus Böck --- .../Dialect/SparseTensor/Transforms/Passes.h | 2 +- .../mlir/Transforms/DialectConversion.h | 62 ++++++++++++++----- .../mlir/Transforms/OneToNTypeConversion.h | 45 +------------- .../ArmSME/Transforms/VectorLegalization.cpp | 2 +- .../Transforms/Utils/DialectConversion.cpp | 26 ++++++-- .../Transforms/Utils/OneToNTypeConversion.cpp | 44 +++++-------- .../TestOneToNTypeConversionPass.cpp | 18 ++++-- 7 files changed, 101 insertions(+), 98 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h index 6ccbc40bdd6034a..2e9c297f20182af 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h @@ -150,7 +150,7 @@ std::unique_ptr createLowerForeachToSCFPass(); //===----------------------------------------------------------------------===// /// Type converter for iter_space and iterator. -struct SparseIterationTypeConverter : public OneToNTypeConverter { +struct SparseIterationTypeConverter : public TypeConverter { SparseIterationTypeConverter(); }; diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index 5ff36160dd61620..5e5957170e646c3 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -173,7 +173,9 @@ class TypeConverter { /// conversion has finished. /// /// Note: Target materializations may optionally accept an additional Type - /// parameter, which is the original type of the SSA value. + /// parameter, which is the original type of the SSA value. Furthermore, `T` + /// can be a TypeRange; in that case, the function must return a + /// SmallVector. /// This method registers a materialization that will be called when /// converting (potentially multiple) block arguments that were the result of @@ -210,6 +212,9 @@ class TypeConverter { /// will be invoked with: outputType = "t3", inputs = "v2", // originalType = "t1". Note that the original type "t1" cannot be recovered /// from just "t3" and "v2"; that's why the originalType parameter exists. + /// + /// Note: During a 1:N conversion, the result types can be a TypeRange. In + /// that case the materialization produces a SmallVector. template >::template arg_t<1>> void addTargetMaterialization(FnT &&callback) { @@ -316,6 +321,11 @@ class TypeConverter { Value materializeTargetConversion(OpBuilder &builder, Location loc, Type resultType, ValueRange inputs, Type originalType = {}) const; + SmallVector materializeTargetConversion(OpBuilder &builder, + Location loc, + TypeRange resultType, + ValueRange inputs, + Type originalType = {}) const; /// Convert an attribute present `attr` from within the type `type` using /// the registered conversion functions. If no applicable conversion has been @@ -340,9 +350,9 @@ class TypeConverter { /// The signature of the callback used to materialize a target conversion. /// - /// Arguments: builder, result type, inputs, location, original type - using TargetMaterializationCallbackFn = - std::function; + /// Arguments: builder, result types, inputs, location, original type + using TargetMaterializationCallbackFn = std::function( + OpBuilder &, TypeRange, ValueRange, Location, Type)>; /// The signature of the callback used to convert a type attribute. using TypeAttributeConversionCallbackFn = @@ -409,22 +419,46 @@ class TypeConverter { /// callback. /// /// With callback of form: - /// `Value(OpBuilder &, T, ValueRange, Location, Type)` + /// - Value(OpBuilder &, T, ValueRange, Location, Type) + /// - SmallVector(OpBuilder &, TypeRange, ValueRange, Location, Type) template std::enable_if_t< std::is_invocable_v, TargetMaterializationCallbackFn> wrapTargetMaterialization(FnT &&callback) const { return [callback = std::forward(callback)]( - OpBuilder &builder, Type resultType, ValueRange inputs, - Location loc, Type originalType) -> Value { - if (T derivedType = dyn_cast(resultType)) - return callback(builder, derivedType, inputs, loc, originalType); - return Value(); + OpBuilder &builder, TypeRange resultTypes, ValueRange inputs, + Location loc, Type originalType) -> SmallVector { + SmallVector result; + if constexpr (std::is_same::value) { + // This is a 1:N target materialization. Return the produces values + // directly. + result = callback(builder, resultTypes, inputs, loc, originalType); + } else if constexpr (std::is_assignable::value) { + // This is a 1:1 target materialization. Invoke the callback only if a + // single SSA value is requested. + if (resultTypes.size() == 1) { + // Invoke the callback only if the type class of the callback matches + // the requested result type. + if (T derivedType = dyn_cast(resultTypes.front())) { + // 1:1 materializations produce single values, but we store 1:N + // target materialization functions in the type converter. Wrap the + // result value in a SmallVector. + Value val = + callback(builder, derivedType, inputs, loc, originalType); + if (val) + result.push_back(val); + } + } + } else { + static_assert(sizeof(T) == 0, "T must be a Type or a TypeRange"); + } + return result; }; } /// With callback of form: - /// `Value(OpBuilder &, T, ValueRange, Location)` + /// - Value(OpBuilder &, T, ValueRange, Location) + /// - SmallVector(OpBuilder &, TypeRange, ValueRange, Location) template std::enable_if_t< std::is_invocable_v, @@ -432,9 +466,9 @@ class TypeConverter { wrapTargetMaterialization(FnT &&callback) const { return wrapTargetMaterialization( [callback = std::forward(callback)]( - OpBuilder &builder, T resultType, ValueRange inputs, Location loc, - Type originalType) -> Value { - return callback(builder, resultType, inputs, loc); + OpBuilder &builder, T resultTypes, ValueRange inputs, Location loc, + Type originalType) { + return callback(builder, resultTypes, inputs, loc); }); } diff --git a/mlir/include/mlir/Transforms/OneToNTypeConversion.h b/mlir/include/mlir/Transforms/OneToNTypeConversion.h index c59a3a52f028f32..7b4dd65cbff7b2d 100644 --- a/mlir/include/mlir/Transforms/OneToNTypeConversion.h +++ b/mlir/include/mlir/Transforms/OneToNTypeConversion.h @@ -33,49 +33,6 @@ namespace mlir { -/// Extends `TypeConverter` with 1:N target materializations. Such -/// materializations have to provide the "reverse" of 1:N type conversions, -/// i.e., they need to materialize N values with target types into one value -/// with a source type (which isn't possible in the base class currently). -class OneToNTypeConverter : public TypeConverter { -public: - /// Callback that expresses user-provided materialization logic from the given - /// value to N values of the given types. This is useful for expressing target - /// materializations for 1:N type conversions, which materialize one value in - /// a source type as N values in target types. - using OneToNMaterializationCallbackFn = - std::function>(OpBuilder &, TypeRange, - Value, Location)>; - - /// Creates the mapping of the given range of original types to target types - /// of the conversion and stores that mapping in the given (signature) - /// conversion. This function simply calls - /// `TypeConverter::convertSignatureArgs` and exists here with a different - /// name to reflect the broader semantic. - LogicalResult computeTypeMapping(TypeRange types, - SignatureConversion &result) const { - return convertSignatureArgs(types, result); - } - - /// Applies one of the user-provided 1:N target materializations. If several - /// exists, they are tried out in the reverse order in which they have been - /// added until the first one succeeds. If none succeeds, the functions - /// returns `std::nullopt`. - std::optional> - materializeTargetConversion(OpBuilder &builder, Location loc, - TypeRange resultTypes, Value input) const; - - /// Adds a 1:N target materialization to the converter. Such materializations - /// build IR that converts N values with target types into 1 value of the - /// source type. - void addTargetMaterialization(OneToNMaterializationCallbackFn &&callback) { - oneToNTargetMaterializations.emplace_back(std::move(callback)); - } - -private: - SmallVector oneToNTargetMaterializations; -}; - /// Stores a 1:N mapping of types and provides several useful accessors. This /// class extends `SignatureConversion`, which already supports 1:N type /// mappings but lacks some accessors into the mapping as well as access to the @@ -295,7 +252,7 @@ class OneToNOpConversionPattern : public OneToNConversionPattern { /// not fail if some ops or types remain unconverted (i.e., the conversion is /// only "partial"). LogicalResult -applyPartialOneToNConversion(Operation *op, OneToNTypeConverter &typeConverter, +applyPartialOneToNConversion(Operation *op, TypeConverter &typeConverter, const FrozenRewritePatternSet &patterns); /// Add a pattern to the given pattern list to convert the signature of a diff --git a/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp b/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp index 4968c4fc463d04b..e908a536e6fb271 100644 --- a/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp +++ b/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp @@ -921,7 +921,7 @@ struct VectorLegalizationPass : public arm_sme::impl::VectorLegalizationBase { void runOnOperation() override { auto *context = &getContext(); - OneToNTypeConverter converter; + TypeConverter converter; RewritePatternSet patterns(context); converter.addConversion([](Type type) { return type; }); converter.addConversion( diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 3cfcaa965f3546a..3d0c81867e0cc26 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -2831,11 +2831,29 @@ Value TypeConverter::materializeTargetConversion(OpBuilder &builder, Location loc, Type resultType, ValueRange inputs, Type originalType) const { + SmallVector result = materializeTargetConversion( + builder, loc, TypeRange(resultType), inputs, originalType); + if (result.empty()) + return nullptr; + assert(result.size() == 1 && "expected single result"); + return result.front(); +} + +SmallVector TypeConverter::materializeTargetConversion( + OpBuilder &builder, Location loc, TypeRange resultTypes, ValueRange inputs, + Type originalType) const { for (const TargetMaterializationCallbackFn &fn : - llvm::reverse(targetMaterializations)) - if (Value result = fn(builder, resultType, inputs, loc, originalType)) - return result; - return nullptr; + llvm::reverse(targetMaterializations)) { + SmallVector result = + fn(builder, resultTypes, inputs, loc, originalType); + if (result.empty()) + continue; + assert(TypeRange(result) == resultTypes && + "callback produced incorrect number of values or values with " + "incorrect types"); + return result; + } + return {}; } std::optional diff --git a/mlir/lib/Transforms/Utils/OneToNTypeConversion.cpp b/mlir/lib/Transforms/Utils/OneToNTypeConversion.cpp index 19e29d48623e04c..c208716891ef1f4 100644 --- a/mlir/lib/Transforms/Utils/OneToNTypeConversion.cpp +++ b/mlir/lib/Transforms/Utils/OneToNTypeConversion.cpp @@ -17,20 +17,6 @@ using namespace llvm; using namespace mlir; -std::optional> -OneToNTypeConverter::materializeTargetConversion(OpBuilder &builder, - Location loc, - TypeRange resultTypes, - Value input) const { - for (const OneToNMaterializationCallbackFn &fn : - llvm::reverse(oneToNTargetMaterializations)) { - if (std::optional> result = - fn(builder, resultTypes, input, loc)) - return *result; - } - return std::nullopt; -} - TypeRange OneToNTypeMapping::getConvertedTypes(unsigned originalTypeNo) const { TypeRange convertedTypes = getConvertedTypes(); if (auto mapping = getInputMapping(originalTypeNo)) @@ -268,20 +254,20 @@ Block *OneToNPatternRewriter::applySignatureConversion( LogicalResult OneToNConversionPattern::matchAndRewrite(Operation *op, PatternRewriter &rewriter) const { - auto *typeConverter = getTypeConverter(); + auto *typeConverter = getTypeConverter(); // Construct conversion mapping for results. Operation::result_type_range originalResultTypes = op->getResultTypes(); OneToNTypeMapping resultMapping(originalResultTypes); - if (failed(typeConverter->computeTypeMapping(originalResultTypes, - resultMapping))) + if (failed(typeConverter->convertSignatureArgs(originalResultTypes, + resultMapping))) return failure(); // Construct conversion mapping for operands. Operation::operand_type_range originalOperandTypes = op->getOperandTypes(); OneToNTypeMapping operandMapping(originalOperandTypes); - if (failed(typeConverter->computeTypeMapping(originalOperandTypes, - operandMapping))) + if (failed(typeConverter->convertSignatureArgs(originalOperandTypes, + operandMapping))) return failure(); // Cast operands to target types. @@ -318,7 +304,7 @@ namespace mlir { // inserted by this pass are annotated with a string attribute that also // documents which kind of the cast (source, argument, or target). LogicalResult -applyPartialOneToNConversion(Operation *op, OneToNTypeConverter &typeConverter, +applyPartialOneToNConversion(Operation *op, TypeConverter &typeConverter, const FrozenRewritePatternSet &patterns) { #ifndef NDEBUG // Remember existing unrealized casts. This data structure is only used in @@ -370,15 +356,13 @@ applyPartialOneToNConversion(Operation *op, OneToNTypeConverter &typeConverter, // Target materialization. assert(!areOperandTypesLegal && areResultsTypesLegal && operands.size() == 1 && "found unexpected target cast"); - std::optional> maybeResults = - typeConverter.materializeTargetConversion( - rewriter, castOp->getLoc(), resultTypes, operands.front()); - if (!maybeResults) { + materializedResults = typeConverter.materializeTargetConversion( + rewriter, castOp->getLoc(), resultTypes, operands.front()); + if (materializedResults.empty()) { emitError(castOp->getLoc()) << "failed to create target materialization"; return failure(); } - materializedResults = maybeResults.value(); } else { // Source and argument materializations. assert(areOperandTypesLegal && !areResultsTypesLegal && @@ -427,18 +411,18 @@ class FunctionOpInterfaceSignatureConversion : public OneToNConversionPattern { const OneToNTypeMapping &resultMapping, ValueRange convertedOperands) const override { auto funcOp = cast(op); - auto *typeConverter = getTypeConverter(); + auto *typeConverter = getTypeConverter(); // Construct mapping for function arguments. OneToNTypeMapping argumentMapping(funcOp.getArgumentTypes()); - if (failed(typeConverter->computeTypeMapping(funcOp.getArgumentTypes(), - argumentMapping))) + if (failed(typeConverter->convertSignatureArgs(funcOp.getArgumentTypes(), + argumentMapping))) return failure(); // Construct mapping for function results. OneToNTypeMapping funcResultMapping(funcOp.getResultTypes()); - if (failed(typeConverter->computeTypeMapping(funcOp.getResultTypes(), - funcResultMapping))) + if (failed(typeConverter->convertSignatureArgs(funcOp.getResultTypes(), + funcResultMapping))) return failure(); // Nothing to do if the op doesn't have any non-identity conversions for its diff --git a/mlir/test/lib/Conversion/OneToNTypeConversion/TestOneToNTypeConversionPass.cpp b/mlir/test/lib/Conversion/OneToNTypeConversion/TestOneToNTypeConversionPass.cpp index 5c03ac12d1e58ce..b18dfd8bb22cb15 100644 --- a/mlir/test/lib/Conversion/OneToNTypeConversion/TestOneToNTypeConversionPass.cpp +++ b/mlir/test/lib/Conversion/OneToNTypeConversion/TestOneToNTypeConversionPass.cpp @@ -147,9 +147,14 @@ populateDecomposeTuplesTestPatterns(const TypeConverter &typeConverter, /// /// This function has been copied (with small adaptions) from /// TestDecomposeCallGraphTypes.cpp. -static std::optional> -buildGetTupleElementOps(OpBuilder &builder, TypeRange resultTypes, Value input, - Location loc) { +static SmallVector buildGetTupleElementOps(OpBuilder &builder, + TypeRange resultTypes, + ValueRange inputs, + Location loc) { + if (inputs.size() != 1) + return {}; + Value input = inputs.front(); + TupleType inputType = dyn_cast(input.getType()); if (!inputType) return {}; @@ -222,7 +227,7 @@ void TestOneToNTypeConversionPass::runOnOperation() { auto *context = &getContext(); // Assemble type converter. - OneToNTypeConverter typeConverter; + TypeConverter typeConverter; typeConverter.addConversion([](Type type) { return type; }); typeConverter.addConversion( @@ -234,6 +239,11 @@ void TestOneToNTypeConversionPass::runOnOperation() { typeConverter.addArgumentMaterialization(buildMakeTupleOp); typeConverter.addSourceMaterialization(buildMakeTupleOp); typeConverter.addTargetMaterialization(buildGetTupleElementOps); + // Test the other target materialization variant that takes the original type + // as additional argument. This materialization function always fails. + typeConverter.addTargetMaterialization( + [](OpBuilder &builder, TypeRange resultTypes, ValueRange inputs, + Location loc, Type originalType) -> SmallVector { return {}; }); // Assemble patterns. RewritePatternSet patterns(context); From e724226da753f10fd36fbb0ea392f04ab0fdbdab Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 25 Oct 2024 12:35:33 +0100 Subject: [PATCH 33/39] [VPlan] Return cost of 0 for VPWidenCastRecipe without underlying value. In some cases, VPWidenCastRecipes are created but not considered in the legacy cost model, including truncates/extends when evaluating a reduction in a smaller type. Return 0 for such casts for now, to avoid divergences between VPlan and legacy cost models. Fixes https://github.com/llvm/llvm-project/issues/113526. --- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 5 ++ .../LoopVectorize/X86/cost-model.ll | 65 +++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 0eb4f7c7c88cee7..2080b77157b6ca2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1524,6 +1524,11 @@ void VPWidenCastRecipe::execute(VPTransformState &State) { InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { + // TODO: In some cases, VPWidenCastRecipes are created but not considered in + // the legacy cost model, including truncates/extends when evaluating a + // reduction in a smaller type. + if (!getUnderlyingValue()) + return 0; // Computes the CastContextHint from a recipes that may access memory. auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint { if (VF.isScalar()) diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index 73647919aac3602..29e54fabad0c1bb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -1037,6 +1037,71 @@ exit: ret i64 %red.mul } +; Test case for https://github.com/llvm/llvm-project/issues/113526. +define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 { +; CHECK-LABEL: @narrowed_reduction( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP:%.*]] to i32 +; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = and <16 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i32> [[VEC_PHI1]], +; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i32> [[TMP0]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP1]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP6]] = zext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP7]] = zext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i32> [[TMP7]] to <16 x i1> +; CHECK-NEXT: [[BIN_RDX:%.*]] = or <16 x i1> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[BIN_RDX]]) +; CHECK-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[OR13:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[OR13]], 1 +; CHECK-NEXT: [[OR]] = or i32 [[AND]], [[CONV]] +; CHECK-NEXT: [[INC]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 0 +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[LOOP]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[OR_LCSSA]] +; +entry: + %conv = zext i1 %cmp to i32 + br label %loop + +loop: + %iv = phi i32 [ 1, %entry ], [ %inc, %loop ] + %or13 = phi i32 [ 0, %entry ], [ %or, %loop ] + %and = and i32 %or13, 1 + %or = or i32 %and, %conv + %inc = add i32 %iv, 1 + %ec = icmp eq i32 %iv, 0 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %or +} + declare void @llvm.assume(i1 noundef) #0 attributes #0 = { "target-cpu"="penryn" } From 75252e29ea6a0959f3c1670e641a03fc18fc65fa Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Fri, 25 Oct 2024 12:40:59 -0700 Subject: [PATCH 34/39] [clang][serialization] Bump `NUM_PREDEF_TYPE_IDS` This fixes a build error caused by 4ac0e7e400fe2a66d1fd5d5d1fa1c899dfb16716. --- clang/include/clang/Serialization/ASTBitCodes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 99232fd21357904..3ddbc5fcd26c44f 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1149,7 +1149,7 @@ enum PredefinedTypeIDs { /// /// Type IDs for non-predefined types will start at /// NUM_PREDEF_TYPE_IDs. -const unsigned NUM_PREDEF_TYPE_IDS = 512; +const unsigned NUM_PREDEF_TYPE_IDS = 513; // Ensure we do not overrun the predefined types we reserved // in the enum PredefinedTypeIDs above. From 6c9bbbc818ae8a0d2849dbc1ebd84a220cc27d20 Mon Sep 17 00:00:00 2001 From: vporpo Date: Fri, 25 Oct 2024 12:47:19 -0700 Subject: [PATCH 35/39] [SandboxVec][Legality] Reject non-instructions (#113190) --- .../Vectorize/SandboxVectorizer/Legality.h | 10 +++++++++- .../Vectorize/SandboxVectorizer/Legality.cpp | 18 +++++++++++++++++- .../SandboxVectorizer/Passes/BottomUpVec.cpp | 2 +- .../SandboxVectorizer/LegalityTest.cpp | 13 ++++++++++++- 4 files changed, 39 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h index bcfafd75d4caaf5..d4b0b54375b0267 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h @@ -28,6 +28,7 @@ enum class LegalityResultID { /// The reason for vectorizing or not vectorizing. enum class ResultReason { + NotInstructions, DiffOpcodes, DiffTypes, }; @@ -46,6 +47,8 @@ struct ToStr { static const char *getVecReason(ResultReason Reason) { switch (Reason) { + case ResultReason::NotInstructions: + return "NotInstructions"; case ResultReason::DiffOpcodes: return "DiffOpcodes"; case ResultReason::DiffTypes: @@ -67,6 +70,10 @@ class LegalityResult { LegalityResult(LegalityResultID ID) : ID(ID) {} friend class LegalityAnalysis; + /// We shouldn't need copies. + LegalityResult(const LegalityResult &) = delete; + LegalityResult &operator=(const LegalityResult &) = delete; + public: virtual ~LegalityResult() {} LegalityResultID getSubclassID() const { return ID; } @@ -90,6 +97,7 @@ class LegalityResultWithReason : public LegalityResult { friend class Pack; // For constructor. public: + ResultReason getReason() const { return Reason; } #ifndef NDEBUG void print(raw_ostream &OS) const override { LegalityResult::print(OS); @@ -138,7 +146,7 @@ class LegalityAnalysis { } /// Checks if it's legal to vectorize the instructions in \p Bndl. /// \Returns a LegalityResult object owned by LegalityAnalysis. - LegalityResult &canVectorize(ArrayRef Bndl); + const LegalityResult &canVectorize(ArrayRef Bndl); }; } // namespace llvm::sandboxir diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp index 0e2cd83c37b0cd0..f1c4577cece78af 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp @@ -7,11 +7,15 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h" +#include "llvm/SandboxIR/Instruction.h" +#include "llvm/SandboxIR/Utils.h" #include "llvm/SandboxIR/Value.h" #include "llvm/Support/Debug.h" namespace llvm::sandboxir { +#define DEBUG_TYPE "SBVec:Legality" + #ifndef NDEBUG void LegalityResult::dump() const { print(dbgs()); @@ -26,7 +30,19 @@ LegalityAnalysis::notVectorizableBasedOnOpcodesAndTypes( return std::nullopt; } -LegalityResult &LegalityAnalysis::canVectorize(ArrayRef Bndl) { +static void dumpBndl(ArrayRef Bndl) { + for (auto *V : Bndl) + dbgs() << *V << "\n"; +} + +const LegalityResult &LegalityAnalysis::canVectorize(ArrayRef Bndl) { + // If Bndl contains values other than instructions, we need to Pack. + if (any_of(Bndl, [](auto *V) { return !isa(V); })) { + LLVM_DEBUG(dbgs() << "Not vectorizing: Not Instructions:\n"; + dumpBndl(Bndl);); + return createLegalityResult(ResultReason::NotInstructions); + } + if (auto ReasonOpt = notVectorizableBasedOnOpcodesAndTypes(Bndl)) return createLegalityResult(*ReasonOpt); diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp index f11420e47f3e1f9..ede41cd661b559a 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp @@ -40,7 +40,7 @@ static SmallVector getOperand(ArrayRef Bndl, } void BottomUpVec::vectorizeRec(ArrayRef Bndl) { - auto LegalityRes = Legality.canVectorize(Bndl); + const auto &LegalityRes = Legality.canVectorize(Bndl); switch (LegalityRes.getSubclassID()) { case LegalityResultID::Widen: { auto *I = cast(Bndl[0]); diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp index 76e5a5ce5aed920..56c6bf5f1ef1f5c 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp @@ -52,8 +52,16 @@ define void @foo(ptr %ptr) { auto *St1 = cast(&*It++); sandboxir::LegalityAnalysis Legality; - auto Result = Legality.canVectorize({St0, St1}); + const auto &Result = Legality.canVectorize({St0, St1}); EXPECT_TRUE(isa(Result)); + + { + // Check NotInstructions + auto &Result = Legality.canVectorize({F, St0}); + EXPECT_TRUE(isa(Result)); + EXPECT_EQ(cast(Result).getReason(), + sandboxir::ResultReason::NotInstructions); + } } #ifndef NDEBUG @@ -68,6 +76,9 @@ TEST_F(LegalityTest, LegalityResultDump) { sandboxir::LegalityAnalysis Legality; EXPECT_TRUE( Matches(Legality.createLegalityResult(), "Widen")); + EXPECT_TRUE(Matches(Legality.createLegalityResult( + sandboxir::ResultReason::NotInstructions), + "Pack Reason: NotInstructions")); EXPECT_TRUE(Matches(Legality.createLegalityResult( sandboxir::ResultReason::DiffOpcodes), "Pack Reason: DiffOpcodes")); From eb9f4756bc3daaa4b19f4f46521dc05180814de4 Mon Sep 17 00:00:00 2001 From: Vasileios Porpodas Date: Fri, 25 Oct 2024 12:52:31 -0700 Subject: [PATCH 36/39] Revert "[SandboxVec][Legality] Reject non-instructions (#113190)" This reverts commit 6c9bbbc818ae8a0d2849dbc1ebd84a220cc27d20. --- .../Vectorize/SandboxVectorizer/Legality.h | 10 +--------- .../Vectorize/SandboxVectorizer/Legality.cpp | 18 +----------------- .../SandboxVectorizer/Passes/BottomUpVec.cpp | 2 +- .../SandboxVectorizer/LegalityTest.cpp | 13 +------------ 4 files changed, 4 insertions(+), 39 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h index d4b0b54375b0267..bcfafd75d4caaf5 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h @@ -28,7 +28,6 @@ enum class LegalityResultID { /// The reason for vectorizing or not vectorizing. enum class ResultReason { - NotInstructions, DiffOpcodes, DiffTypes, }; @@ -47,8 +46,6 @@ struct ToStr { static const char *getVecReason(ResultReason Reason) { switch (Reason) { - case ResultReason::NotInstructions: - return "NotInstructions"; case ResultReason::DiffOpcodes: return "DiffOpcodes"; case ResultReason::DiffTypes: @@ -70,10 +67,6 @@ class LegalityResult { LegalityResult(LegalityResultID ID) : ID(ID) {} friend class LegalityAnalysis; - /// We shouldn't need copies. - LegalityResult(const LegalityResult &) = delete; - LegalityResult &operator=(const LegalityResult &) = delete; - public: virtual ~LegalityResult() {} LegalityResultID getSubclassID() const { return ID; } @@ -97,7 +90,6 @@ class LegalityResultWithReason : public LegalityResult { friend class Pack; // For constructor. public: - ResultReason getReason() const { return Reason; } #ifndef NDEBUG void print(raw_ostream &OS) const override { LegalityResult::print(OS); @@ -146,7 +138,7 @@ class LegalityAnalysis { } /// Checks if it's legal to vectorize the instructions in \p Bndl. /// \Returns a LegalityResult object owned by LegalityAnalysis. - const LegalityResult &canVectorize(ArrayRef Bndl); + LegalityResult &canVectorize(ArrayRef Bndl); }; } // namespace llvm::sandboxir diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp index f1c4577cece78af..0e2cd83c37b0cd0 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp @@ -7,15 +7,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h" -#include "llvm/SandboxIR/Instruction.h" -#include "llvm/SandboxIR/Utils.h" #include "llvm/SandboxIR/Value.h" #include "llvm/Support/Debug.h" namespace llvm::sandboxir { -#define DEBUG_TYPE "SBVec:Legality" - #ifndef NDEBUG void LegalityResult::dump() const { print(dbgs()); @@ -30,19 +26,7 @@ LegalityAnalysis::notVectorizableBasedOnOpcodesAndTypes( return std::nullopt; } -static void dumpBndl(ArrayRef Bndl) { - for (auto *V : Bndl) - dbgs() << *V << "\n"; -} - -const LegalityResult &LegalityAnalysis::canVectorize(ArrayRef Bndl) { - // If Bndl contains values other than instructions, we need to Pack. - if (any_of(Bndl, [](auto *V) { return !isa(V); })) { - LLVM_DEBUG(dbgs() << "Not vectorizing: Not Instructions:\n"; - dumpBndl(Bndl);); - return createLegalityResult(ResultReason::NotInstructions); - } - +LegalityResult &LegalityAnalysis::canVectorize(ArrayRef Bndl) { if (auto ReasonOpt = notVectorizableBasedOnOpcodesAndTypes(Bndl)) return createLegalityResult(*ReasonOpt); diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp index ede41cd661b559a..f11420e47f3e1f9 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp @@ -40,7 +40,7 @@ static SmallVector getOperand(ArrayRef Bndl, } void BottomUpVec::vectorizeRec(ArrayRef Bndl) { - const auto &LegalityRes = Legality.canVectorize(Bndl); + auto LegalityRes = Legality.canVectorize(Bndl); switch (LegalityRes.getSubclassID()) { case LegalityResultID::Widen: { auto *I = cast(Bndl[0]); diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp index 56c6bf5f1ef1f5c..76e5a5ce5aed920 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp @@ -52,16 +52,8 @@ define void @foo(ptr %ptr) { auto *St1 = cast(&*It++); sandboxir::LegalityAnalysis Legality; - const auto &Result = Legality.canVectorize({St0, St1}); + auto Result = Legality.canVectorize({St0, St1}); EXPECT_TRUE(isa(Result)); - - { - // Check NotInstructions - auto &Result = Legality.canVectorize({F, St0}); - EXPECT_TRUE(isa(Result)); - EXPECT_EQ(cast(Result).getReason(), - sandboxir::ResultReason::NotInstructions); - } } #ifndef NDEBUG @@ -76,9 +68,6 @@ TEST_F(LegalityTest, LegalityResultDump) { sandboxir::LegalityAnalysis Legality; EXPECT_TRUE( Matches(Legality.createLegalityResult(), "Widen")); - EXPECT_TRUE(Matches(Legality.createLegalityResult( - sandboxir::ResultReason::NotInstructions), - "Pack Reason: NotInstructions")); EXPECT_TRUE(Matches(Legality.createLegalityResult( sandboxir::ResultReason::DiffOpcodes), "Pack Reason: DiffOpcodes")); From 1540f772c793b3a29ae5d57e99456ec5d7ef4b39 Mon Sep 17 00:00:00 2001 From: Vasileios Porpodas Date: Fri, 25 Oct 2024 12:53:26 -0700 Subject: [PATCH 37/39] Reapply "[SandboxVec][Legality] Reject non-instructions (#113190)" This reverts commit eb9f4756bc3daaa4b19f4f46521dc05180814de4. --- .../Vectorize/SandboxVectorizer/Legality.h | 10 +++++++++- .../Vectorize/SandboxVectorizer/Legality.cpp | 20 ++++++++++++++++++- .../SandboxVectorizer/Passes/BottomUpVec.cpp | 2 +- .../SandboxVectorizer/LegalityTest.cpp | 13 +++++++++++- 4 files changed, 41 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h index bcfafd75d4caaf5..d4b0b54375b0267 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h @@ -28,6 +28,7 @@ enum class LegalityResultID { /// The reason for vectorizing or not vectorizing. enum class ResultReason { + NotInstructions, DiffOpcodes, DiffTypes, }; @@ -46,6 +47,8 @@ struct ToStr { static const char *getVecReason(ResultReason Reason) { switch (Reason) { + case ResultReason::NotInstructions: + return "NotInstructions"; case ResultReason::DiffOpcodes: return "DiffOpcodes"; case ResultReason::DiffTypes: @@ -67,6 +70,10 @@ class LegalityResult { LegalityResult(LegalityResultID ID) : ID(ID) {} friend class LegalityAnalysis; + /// We shouldn't need copies. + LegalityResult(const LegalityResult &) = delete; + LegalityResult &operator=(const LegalityResult &) = delete; + public: virtual ~LegalityResult() {} LegalityResultID getSubclassID() const { return ID; } @@ -90,6 +97,7 @@ class LegalityResultWithReason : public LegalityResult { friend class Pack; // For constructor. public: + ResultReason getReason() const { return Reason; } #ifndef NDEBUG void print(raw_ostream &OS) const override { LegalityResult::print(OS); @@ -138,7 +146,7 @@ class LegalityAnalysis { } /// Checks if it's legal to vectorize the instructions in \p Bndl. /// \Returns a LegalityResult object owned by LegalityAnalysis. - LegalityResult &canVectorize(ArrayRef Bndl); + const LegalityResult &canVectorize(ArrayRef Bndl); }; } // namespace llvm::sandboxir diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp index 0e2cd83c37b0cd0..e4546c2f98113ee 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp @@ -7,11 +7,15 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h" +#include "llvm/SandboxIR/Instruction.h" +#include "llvm/SandboxIR/Utils.h" #include "llvm/SandboxIR/Value.h" #include "llvm/Support/Debug.h" namespace llvm::sandboxir { +#define DEBUG_TYPE "SBVec:Legality" + #ifndef NDEBUG void LegalityResult::dump() const { print(dbgs()); @@ -26,7 +30,21 @@ LegalityAnalysis::notVectorizableBasedOnOpcodesAndTypes( return std::nullopt; } -LegalityResult &LegalityAnalysis::canVectorize(ArrayRef Bndl) { +#ifndef NDEBUG +static void dumpBndl(ArrayRef Bndl) { + for (auto *V : Bndl) + dbgs() << *V << "\n"; +} +#endif // NDEBUG + +const LegalityResult &LegalityAnalysis::canVectorize(ArrayRef Bndl) { + // If Bndl contains values other than instructions, we need to Pack. + if (any_of(Bndl, [](auto *V) { return !isa(V); })) { + LLVM_DEBUG(dbgs() << "Not vectorizing: Not Instructions:\n"; + dumpBndl(Bndl);); + return createLegalityResult(ResultReason::NotInstructions); + } + if (auto ReasonOpt = notVectorizableBasedOnOpcodesAndTypes(Bndl)) return createLegalityResult(*ReasonOpt); diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp index f11420e47f3e1f9..ede41cd661b559a 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp @@ -40,7 +40,7 @@ static SmallVector getOperand(ArrayRef Bndl, } void BottomUpVec::vectorizeRec(ArrayRef Bndl) { - auto LegalityRes = Legality.canVectorize(Bndl); + const auto &LegalityRes = Legality.canVectorize(Bndl); switch (LegalityRes.getSubclassID()) { case LegalityResultID::Widen: { auto *I = cast(Bndl[0]); diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp index 76e5a5ce5aed920..56c6bf5f1ef1f5c 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp @@ -52,8 +52,16 @@ define void @foo(ptr %ptr) { auto *St1 = cast(&*It++); sandboxir::LegalityAnalysis Legality; - auto Result = Legality.canVectorize({St0, St1}); + const auto &Result = Legality.canVectorize({St0, St1}); EXPECT_TRUE(isa(Result)); + + { + // Check NotInstructions + auto &Result = Legality.canVectorize({F, St0}); + EXPECT_TRUE(isa(Result)); + EXPECT_EQ(cast(Result).getReason(), + sandboxir::ResultReason::NotInstructions); + } } #ifndef NDEBUG @@ -68,6 +76,9 @@ TEST_F(LegalityTest, LegalityResultDump) { sandboxir::LegalityAnalysis Legality; EXPECT_TRUE( Matches(Legality.createLegalityResult(), "Widen")); + EXPECT_TRUE(Matches(Legality.createLegalityResult( + sandboxir::ResultReason::NotInstructions), + "Pack Reason: NotInstructions")); EXPECT_TRUE(Matches(Legality.createLegalityResult( sandboxir::ResultReason::DiffOpcodes), "Pack Reason: DiffOpcodes")); From cfde4fbccf5d8d949a8cade0a4f8ef9b0f47ca73 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 25 Oct 2024 16:46:38 -0400 Subject: [PATCH 38/39] [libc++] Remove obsolete Solaris and Newlib support for locales (#113721) The solaris header file doesn't even exist, so that's definitely dead code. The newlib header is empty, which means that localization can't work on that platform. If someone is using libc++ with Newlib, they must be providing LIBCXX_HAS_NO_LOCALIZATION today for anything to work, so that header is basically dead code as well. --- libcxx/include/CMakeLists.txt | 1 - libcxx/include/__locale_dir/locale_base_api.h | 4 ---- libcxx/include/__locale_dir/locale_base_api/newlib.h | 12 ------------ libcxx/include/module.modulemap | 1 - 4 files changed, 18 deletions(-) delete mode 100644 libcxx/include/__locale_dir/locale_base_api/newlib.h diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 63aa74e09bb1a27..506ed721d0843ec 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -497,7 +497,6 @@ set(files __locale_dir/locale_base_api/fuchsia.h __locale_dir/locale_base_api/ibm.h __locale_dir/locale_base_api/musl.h - __locale_dir/locale_base_api/newlib.h __locale_dir/locale_base_api/openbsd.h __locale_dir/locale_base_api/win32.h __locale_dir/locale_guard.h diff --git a/libcxx/include/__locale_dir/locale_base_api.h b/libcxx/include/__locale_dir/locale_base_api.h index 8c000c558c52793..eab7fa8bf62faec 100644 --- a/libcxx/include/__locale_dir/locale_base_api.h +++ b/libcxx/include/__locale_dir/locale_base_api.h @@ -15,10 +15,6 @@ # include <__locale_dir/locale_base_api/ibm.h> #elif defined(__ANDROID__) # include <__locale_dir/locale_base_api/android.h> -#elif defined(__sun__) -# include <__locale_dir/locale_base_api/solaris.h> -#elif defined(_NEWLIB_VERSION) -# include <__locale_dir/locale_base_api/newlib.h> #elif defined(__OpenBSD__) # include <__locale_dir/locale_base_api/openbsd.h> #elif defined(__Fuchsia__) diff --git a/libcxx/include/__locale_dir/locale_base_api/newlib.h b/libcxx/include/__locale_dir/locale_base_api/newlib.h deleted file mode 100644 index 7da10e5889843dd..000000000000000 --- a/libcxx/include/__locale_dir/locale_base_api/newlib.h +++ /dev/null @@ -1,12 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_NEWLIB_H -#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_NEWLIB_H - -#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_NEWLIB_H diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index c79070c318759db..f92e8bf5fc9aba5 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -1451,7 +1451,6 @@ module std [system] { textual header "__locale_dir/locale_base_api/fuchsia.h" textual header "__locale_dir/locale_base_api/ibm.h" textual header "__locale_dir/locale_base_api/musl.h" - textual header "__locale_dir/locale_base_api/newlib.h" textual header "__locale_dir/locale_base_api/openbsd.h" textual header "__locale_dir/locale_base_api/win32.h" } From 1bc2cd98c58a1059170dc38697c7a29a8e21160b Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 25 Oct 2024 13:52:51 -0700 Subject: [PATCH 39/39] [WebAssembly] Enable nontrapping-fptoint and bulk-memory by default. (#112049) We were prepared to enable these features [back in February], but they got pulled for what appear to be unrelated reasons. So let's have another try at enabling them! Another motivation here is that it'd be convenient for the [Lime1 proposal] if "lime1" is close to a subset of "generic" (missing only for extended-const). [back in February]: https://github.com/WebAssembly/tool-conventions/issues/158#issuecomment-1931119512 [Lime1 proposal]: https://github.com/llvm/llvm-project/pull/112035 --- clang/docs/ReleaseNotes.rst | 9 ++++++ clang/lib/Basic/Targets/WebAssembly.cpp | 4 +-- .../test/Preprocessor/wasm-target-features.c | 4 +-- lld/test/wasm/custom-section-name.ll | 2 +- lld/test/wasm/data-segments.ll | 2 +- lld/test/wasm/lto/Inputs/libcall-archive.ll | 4 ++- lld/test/wasm/lto/libcall-archive.ll | 4 ++- lld/test/wasm/lto/stub-library-libcall.s | 4 +-- llvm/docs/ReleaseNotes.md | 9 ++++++ llvm/lib/Target/WebAssembly/WebAssembly.td | 3 +- .../WebAssemblyFixFunctionBitcasts.cpp | 2 ++ .../WebAssembly/WebAssemblyTargetMachine.cpp | 29 +++++++++++++++---- .../WebAssembly/cfg-stackify-eh-legacy.ll | 10 +++---- .../WebAssembly/target-features-cpus.ll | 8 ++++- .../WebAssembly/extern-functype-intrinsic.ll | 4 +-- llvm/test/MC/WebAssembly/libcall.ll | 2 +- 16 files changed, 74 insertions(+), 26 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 170c4cc280537f9..6a95337815174bc 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -690,6 +690,15 @@ NetBSD Support WebAssembly Support ^^^^^^^^^^^^^^^^^^^ +The default target CPU, "generic", now enables the `-mnontrapping-fptoint` +and `-mbulk-memory` flags, which correspond to the [Bulk Memory Operations] +and [Non-trapping float-to-int Conversions] language features, which are +[widely implemented in engines]. + +[Bulk Memory Operations]: https://github.com/WebAssembly/bulk-memory-operations/blob/master/proposals/bulk-memory-operations/Overview.md +[Non-trapping float-to-int Conversions]: https://github.com/WebAssembly/spec/blob/master/proposals/nontrapping-float-to-int-conversion/Overview.md +[widely implemented in engines]: https://webassembly.org/features/ + AVR Support ^^^^^^^^^^^ diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp index 4c9df6007b78231..0b380bdf835ffbd 100644 --- a/clang/lib/Basic/Targets/WebAssembly.cpp +++ b/clang/lib/Basic/Targets/WebAssembly.cpp @@ -154,20 +154,20 @@ bool WebAssemblyTargetInfo::initFeatureMap( llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector &FeaturesVec) const { auto addGenericFeatures = [&]() { + Features["bulk-memory"] = true; Features["multivalue"] = true; Features["mutable-globals"] = true; + Features["nontrapping-fptoint"] = true; Features["reference-types"] = true; Features["sign-ext"] = true; }; auto addBleedingEdgeFeatures = [&]() { addGenericFeatures(); Features["atomics"] = true; - Features["bulk-memory"] = true; Features["exception-handling"] = true; Features["extended-const"] = true; Features["fp16"] = true; Features["multimemory"] = true; - Features["nontrapping-fptoint"] = true; Features["tail-call"] = true; Features["wide-arithmetic"] = true; setSIMDLevel(Features, RelaxedSIMD, true); diff --git a/clang/test/Preprocessor/wasm-target-features.c b/clang/test/Preprocessor/wasm-target-features.c index 14d2fbf4423d32b..71b7cf6a5d43cc1 100644 --- a/clang/test/Preprocessor/wasm-target-features.c +++ b/clang/test/Preprocessor/wasm-target-features.c @@ -163,8 +163,10 @@ // RUN: -target wasm64-unknown-unknown -mcpu=generic \ // RUN: | FileCheck %s -check-prefix=GENERIC-INCLUDE // +// GENERIC-INCLUDE-DAG: #define __wasm_bulk_memory__ 1{{$}} // GENERIC-INCLUDE-DAG: #define __wasm_multivalue__ 1{{$}} // GENERIC-INCLUDE-DAG: #define __wasm_mutable_globals__ 1{{$}} +// GENERIC-INCLUDE-DAG: #define __wasm_nontrapping_fptoint__ 1{{$}} // GENERIC-INCLUDE-DAG: #define __wasm_reference_types__ 1{{$}} // GENERIC-INCLUDE-DAG: #define __wasm_sign_ext__ 1{{$}} // @@ -176,12 +178,10 @@ // RUN: | FileCheck %s -check-prefix=GENERIC // // GENERIC-NOT: #define __wasm_atomics__ 1{{$}} -// GENERIC-NOT: #define __wasm_bulk_memory__ 1{{$}} // GENERIC-NOT: #define __wasm_exception_handling__ 1{{$}} // GENERIC-NOT: #define __wasm_extended_const__ 1{{$}} // GENERIC-NOT: #define __wasm__fp16__ 1{{$}} // GENERIC-NOT: #define __wasm_multimemory__ 1{{$}} -// GENERIC-NOT: #define __wasm_nontrapping_fptoint__ 1{{$}} // GENERIC-NOT: #define __wasm_relaxed_simd__ 1{{$}} // GENERIC-NOT: #define __wasm_simd128__ 1{{$}} // GENERIC-NOT: #define __wasm_tail_call__ 1{{$}} diff --git a/lld/test/wasm/custom-section-name.ll b/lld/test/wasm/custom-section-name.ll index b860ef5a83e8364..8799fbf36056d1d 100644 --- a/lld/test/wasm/custom-section-name.ll +++ b/lld/test/wasm/custom-section-name.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o %t.o +; RUN: llc -filetype=obj -mattr=-bulk-memory %s -o %t.o ; RUN: wasm-ld -no-gc-sections --no-entry -o %t.wasm %t.o ; RUN: obj2yaml %t.wasm | FileCheck %s --check-prefixes=CHECK,NO-BSS ; RUN: wasm-ld -no-gc-sections --no-entry --import-memory -o %t.bss.wasm %t.o diff --git a/lld/test/wasm/data-segments.ll b/lld/test/wasm/data-segments.ll index 670ac3c1f373faf..41868a0b2b50b6f 100644 --- a/lld/test/wasm/data-segments.ll +++ b/lld/test/wasm/data-segments.ll @@ -1,4 +1,4 @@ -; RUN: llc --mtriple=wasm32-unknown-unknown -filetype=obj %s -o %t.atomics.o -mattr=+atomics +; RUN: llc --mtriple=wasm32-unknown-unknown -filetype=obj %s -o %t.atomics.o -mattr=+atomics,-bulk-memory ; RUN: llc --mtriple=wasm32-unknown-unknown -filetype=obj %s -o %t.bulk-mem.o -mattr=+bulk-memory ; RUN: llc --mtriple=wasm64-unknown-unknown -filetype=obj %s -o %t.bulk-mem64.o -mattr=+bulk-memory ; RUN: llc --mtriple=wasm32-unknown-unknown -filetype=obj %s -o %t.atomics.bulk-mem.o -mattr=+atomics,+bulk-memory diff --git a/lld/test/wasm/lto/Inputs/libcall-archive.ll b/lld/test/wasm/lto/Inputs/libcall-archive.ll index 9d05efdeae0806e..7d8c34196dfe49a 100644 --- a/lld/test/wasm/lto/Inputs/libcall-archive.ll +++ b/lld/test/wasm/lto/Inputs/libcall-archive.ll @@ -1,6 +1,8 @@ target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20" target triple = "wasm32-unknown-unknown" -define void @memcpy() { +define void @memcpy() #0 { ret void } + +attributes #0 = { "target-features"="-bulk-memory" } diff --git a/lld/test/wasm/lto/libcall-archive.ll b/lld/test/wasm/lto/libcall-archive.ll index 2f785b98976ec88..5c46d2f7ed78381 100644 --- a/lld/test/wasm/lto/libcall-archive.ll +++ b/lld/test/wasm/lto/libcall-archive.ll @@ -8,7 +8,7 @@ target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20" target triple = "wasm32-unknown-unknown" -define void @_start(ptr %a, ptr %b) { +define void @_start(ptr %a, ptr %b) #0 { entry: call void @llvm.memcpy.p0.p0.i64(ptr %a, ptr %b, i64 1024, i1 false) ret void @@ -16,6 +16,8 @@ entry: declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) +attributes #0 = { "target-features"="-bulk-memory" } + ; CHECK: - Type: CUSTOM ; CHECK-NEXT: Name: name ; CHECK-NEXT: FunctionNames: diff --git a/lld/test/wasm/lto/stub-library-libcall.s b/lld/test/wasm/lto/stub-library-libcall.s index ce88a32dd99dc7b..d65983c0cf5bf52 100644 --- a/lld/test/wasm/lto/stub-library-libcall.s +++ b/lld/test/wasm/lto/stub-library-libcall.s @@ -2,7 +2,7 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t_main.o %t/main.s # RUN: llvm-as %S/Inputs/foo.ll -o %t_foo.o # RUN: llvm-as %S/Inputs/libcall.ll -o %t_libcall.o -# RUN: wasm-ld %t_main.o %t_libcall.o %t_foo.o %p/Inputs/stub.so -o %t.wasm +# RUN: wasm-ld -mllvm -mattr=-bulk-memory %t_main.o %t_libcall.o %t_foo.o %p/Inputs/stub.so -o %t.wasm # RUN: obj2yaml %t.wasm | FileCheck %s # The function `func_with_libcall` will generate an undefined reference to @@ -12,7 +12,7 @@ # If %t_foo.o is not included in the link we get an undefined symbol reported # to the dependency of memcpy on the foo export: -# RUN: not wasm-ld %t_main.o %t_libcall.o %p/Inputs/stub.so -o %t.wasm 2>&1 | FileCheck --check-prefix=MISSING %s +# RUN: not wasm-ld -mllvm -mattr=-bulk-memory %t_main.o %t_libcall.o %p/Inputs/stub.so -o %t.wasm 2>&1 | FileCheck --check-prefix=MISSING %s # MISSING: stub.so: undefined symbol: foo. Required by memcpy #--- main.s diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index be51b0af56ddbf7..e3d93f0dfd0ec55 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -180,6 +180,15 @@ Changes to the RISC-V Backend Changes to the WebAssembly Backend ---------------------------------- +The default target CPU, "generic", now enables the `-mnontrapping-fptoint` +and `-mbulk-memory` flags, which correspond to the [Bulk Memory Operations] +and [Non-trapping float-to-int Conversions] language features, which are +[widely implemented in engines]. + +[Bulk Memory Operations]: https://github.com/WebAssembly/bulk-memory-operations/blob/master/proposals/bulk-memory-operations/Overview.md +[Non-trapping float-to-int Conversions]: https://github.com/WebAssembly/spec/blob/master/proposals/nontrapping-float-to-int-conversion/Overview.md +[widely implemented in engines]: https://webassembly.org/features/ + Changes to the Windows Target ----------------------------- diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td index 37d99690c25b1fa..88628f2a7935453 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.td +++ b/llvm/lib/Target/WebAssembly/WebAssembly.td @@ -114,7 +114,8 @@ def : ProcessorModel<"mvp", NoSchedModel, []>; // consideration given to available support in relevant engines and tools, and // the importance of the features. def : ProcessorModel<"generic", NoSchedModel, - [FeatureMultivalue, FeatureMutableGlobals, + [FeatureBulkMemory, FeatureMultivalue, + FeatureMutableGlobals, FeatureNontrappingFPToInt, FeatureReferenceTypes, FeatureSignExt]>; // Latest and greatest experimental version of WebAssembly. Bugs included! diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp index a3cc9bae470859b..7c3e8d18ad276bb 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp @@ -111,6 +111,7 @@ static Function *createWrapper(Function *F, FunctionType *Ty) { Function *Wrapper = Function::Create(Ty, Function::PrivateLinkage, F->getName() + "_bitcast", M); + Wrapper->setAttributes(F->getAttributes()); BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper); const DataLayout &DL = BB->getDataLayout(); @@ -201,6 +202,7 @@ static Function *createWrapper(Function *F, FunctionType *Ty) { Wrapper->eraseFromParent(); Wrapper = Function::Create(Ty, Function::PrivateLinkage, F->getName() + "_bitcast_invalid", M); + Wrapper->setAttributes(F->getAttributes()); BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper); new UnreachableInst(M->getContext(), BB); Wrapper->setName(F->getName() + "_bitcast_invalid"); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 3fe6ccf1c608e1e..83cd57d0bbdd557 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -233,13 +233,30 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass { private: FeatureBitset coalesceFeatures(const Module &M) { - FeatureBitset Features = - WasmTM - ->getSubtargetImpl(std::string(WasmTM->getTargetCPU()), - std::string(WasmTM->getTargetFeatureString())) - ->getFeatureBits(); - for (auto &F : M) + // Union the features of all defined functions. Start with an empty set, so + // that if a feature is disabled in every function, we'll compute it as + // disabled. If any function lacks a target-features attribute, it'll + // default to the target CPU from the `TargetMachine`. + FeatureBitset Features; + bool AnyDefinedFuncs = false; + for (auto &F : M) { + if (F.isDeclaration()) + continue; + Features |= WasmTM->getSubtargetImpl(F)->getFeatureBits(); + AnyDefinedFuncs = true; + } + + // If we have no defined functions, use the target CPU from the + // `TargetMachine`. + if (!AnyDefinedFuncs) { + Features = + WasmTM + ->getSubtargetImpl(std::string(WasmTM->getTargetCPU()), + std::string(WasmTM->getTargetFeatureString())) + ->getFeatureBits(); + } + return Features; } diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll index cef92f459e4aa37..24a08267db6fbf7 100644 --- a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll +++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll @@ -1,9 +1,9 @@ ; REQUIRES: asserts -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling | FileCheck %s -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -; RUN: llc < %s -O0 -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling | FileCheck %s --check-prefix=NOOPT -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -wasm-disable-ehpad-sort -stats 2>&1 | FileCheck %s --check-prefix=NOSORT -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -wasm-disable-ehpad-sort | FileCheck %s --check-prefix=NOSORT-LOCALS +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,bulk-memory | FileCheck %s +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,bulk-memory +; RUN: llc < %s -O0 -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,-bulk-memory | FileCheck %s --check-prefix=NOOPT +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,-bulk-memory -wasm-disable-ehpad-sort -stats 2>&1 | FileCheck %s --check-prefix=NOSORT +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,-bulk-memory -wasm-disable-ehpad-sort | FileCheck %s --check-prefix=NOSORT-LOCALS target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll b/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll index 77d1564409f78cc..ba10dd94a9838dc 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll @@ -13,7 +13,10 @@ target triple = "wasm32-unknown-unknown" ; generic: +multivalue, +mutable-globals, +reference-types, +sign-ext ; GENERIC-LABEL: .custom_section.target_features,"",@ -; GENERIC-NEXT: .int8 4 +; GENERIC-NEXT: .int8 6 +; GENERIC-NEXT: .int8 43 +; GENERIC-NEXT: .int8 11 +; GENERIC-NEXT: .ascii "bulk-memory" ; GENERIC-NEXT: .int8 43 ; GENERIC-NEXT: .int8 10 ; GENERIC-NEXT: .ascii "multivalue" @@ -21,6 +24,9 @@ target triple = "wasm32-unknown-unknown" ; GENERIC-NEXT: .int8 15 ; GENERIC-NEXT: .ascii "mutable-globals" ; GENERIC-NEXT: .int8 43 +; GENERIC-NEXT: .int8 19 +; GENERIC-NEXT: .ascii "nontrapping-fptoint" +; GENERIC-NEXT: .int8 43 ; GENERIC-NEXT: .int8 15 ; GENERIC-NEXT: .ascii "reference-types" ; GENERIC-NEXT: .int8 43 diff --git a/llvm/test/MC/WebAssembly/extern-functype-intrinsic.ll b/llvm/test/MC/WebAssembly/extern-functype-intrinsic.ll index 320b65356ba9f37..b321c0c82ad4d31 100644 --- a/llvm/test/MC/WebAssembly/extern-functype-intrinsic.ll +++ b/llvm/test/MC/WebAssembly/extern-functype-intrinsic.ll @@ -1,5 +1,5 @@ -; RUN: llc %s -o - | FileCheck %s -; RUN: llc %s -o - | llvm-mc -triple=wasm32-unknown-unknown | FileCheck %s +; RUN: llc %s -mattr=-bulk-memory -o - | FileCheck %s +; RUN: llc %s -mattr=-bulk-memory -o - | llvm-mc -triple=wasm32-unknown-unknown | FileCheck %s ; ModuleID = 'test.c' source_filename = "test.c" diff --git a/llvm/test/MC/WebAssembly/libcall.ll b/llvm/test/MC/WebAssembly/libcall.ll index 8b81f150da892aa..ffd32abe2345bc7 100644 --- a/llvm/test/MC/WebAssembly/libcall.ll +++ b/llvm/test/MC/WebAssembly/libcall.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -filetype=obj -mattr=-bulk-memory %s -o - | obj2yaml | FileCheck %s target triple = "wasm32-unknown-unknown"