diff --git a/clang/lib/Headers/__clang_hip_libdevice_declares.h b/clang/lib/Headers/__clang_hip_libdevice_declares.h index fad9c6ca7ffc5..05654297be08a 100644 --- a/clang/lib/Headers/__clang_hip_libdevice_declares.h +++ b/clang/lib/Headers/__clang_hip_libdevice_declares.h @@ -285,15 +285,6 @@ __device__ __attribute__((pure)) _Float16 __ocml_pown_f16(_Float16, int); typedef _Float16 __2f16 __attribute__((ext_vector_type(2))); typedef short __2i16 __attribute__((ext_vector_type(2))); -// We need to match C99's bool and get an i1 in the IR. -#ifdef __cplusplus -typedef bool __ockl_bool; -#else -typedef _Bool __ockl_bool; -#endif - -__device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b, - float c, __ockl_bool s); __device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_fabs_2f16(__2f16); __device__ __2f16 __ocml_cos_2f16(__2f16); diff --git a/clang/test/Headers/__clang_hip_libdevice_declares.cpp b/clang/test/Headers/__clang_hip_libdevice_declares.cpp index 2b96f9499eb9a..17cd40069d5fe 100644 --- a/clang/test/Headers/__clang_hip_libdevice_declares.cpp +++ b/clang/test/Headers/__clang_hip_libdevice_declares.cpp @@ -84,57 +84,6 @@ TEST_FUNC_ATTRS float test_ockl_acos_f32(float src) { return __ocml_acos_f32(src); } -// CHECK-LABEL: define internal float @_ZL15test_ockl_fdot2Dv2_DF16_S_fbi -// CHECK-SAME: (<2 x half> [[A:%.*]], <2 x half> [[B:%.*]], float [[C:%.*]], i1 zeroext [[S:%.*]], i32 [[S_INT:%.*]]) #[[ATTR2]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5) -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5) -// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[S_ADDR:%.*]] = alloca i8, align 1, addrspace(5) -// CHECK-NEXT: [[S_INT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[Y:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr -// CHECK-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_ADDR]] to ptr -// CHECK-NEXT: [[S_INT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_INT_ADDR]] to ptr -// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr -// CHECK-NEXT: [[Y_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[Y]] to ptr -// CHECK-NEXT: store <2 x half> [[A]], ptr [[A_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <2 x half> [[B]], ptr [[B_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store float [[C]], ptr [[C_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[S]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL]], ptr [[S_ADDR_ASCAST]], align 1 -// CHECK-NEXT: store i32 [[S_INT]], ptr [[S_INT_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[B_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[C_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[S_ADDR_ASCAST]], align 1 -// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 -// CHECK-NEXT: [[CALL:%.*]] = call float @__ockl_fdot2(<2 x half> [[TMP0]], <2 x half> [[TMP1]], float [[TMP2]], i1 zeroext [[TOBOOL]]) #[[ATTR4]] -// CHECK-NEXT: store float [[CALL]], ptr [[X_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load <2 x half>, ptr [[B_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[C_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[S_INT_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TOBOOL1:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK-NEXT: [[CALL2:%.*]] = call float @__ockl_fdot2(<2 x half> [[TMP4]], <2 x half> [[TMP5]], float [[TMP6]], i1 zeroext [[TOBOOL1]]) #[[ATTR4]] -// CHECK-NEXT: store float [[CALL2]], ptr [[Y_ASCAST]], align 4 -// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[X_ASCAST]], align 4 -// CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[Y_ASCAST]], align 4 -// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP8]], [[TMP9]] -// CHECK-NEXT: ret float [[ADD]] -// -TEST_FUNC_ATTRS float test_ockl_fdot2(__2f16 a, __2f16 b, float c, bool s, int s_int) { - float x = __ockl_fdot2(a, b, c, s); - float y = __ockl_fdot2(a, b, c, s_int); - return x + y; -} - - #ifdef _OPENMP #pragma omp end declare target #endif diff --git a/clang/test/Headers/openmp-device-functions-bool.c b/clang/test/Headers/openmp-device-functions-bool.c index bb01096027849..51c97e1d86cdb 100644 --- a/clang/test/Headers/openmp-device-functions-bool.c +++ b/clang/test/Headers/openmp-device-functions-bool.c @@ -1,9 +1,8 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck %s --check-prefixes=CHECK,CHECK-C // RUN: %clang_cc1 -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck %s --check-prefixes=CHECK,CHECK-CPP -// Test that we did not include in C, and OCKL functions using bool -// produce an i1 +// Test that we did not include in C #ifdef __cplusplus typedef bool ockl_bool; @@ -15,37 +14,6 @@ typedef _Bool ockl_bool; #pragma omp begin declare target -// CHECK-LABEL: define hidden float @test_fdot2 -// CHECK-SAME: (<2 x half> noundef [[A:%.*]], <2 x half> noundef [[B:%.*]], float noundef [[C:%.*]], i1 noundef zeroext [[S:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5) -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5) -// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[S_ADDR:%.*]] = alloca i8, align 1, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr -// CHECK-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_ADDR]] to ptr -// CHECK-NEXT: store <2 x half> [[A]], ptr [[A_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <2 x half> [[B]], ptr [[B_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store float [[C]], ptr [[C_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[S]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL]], ptr [[S_ADDR_ASCAST]], align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[B_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[C_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[S_ADDR_ASCAST]], align 1 -// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 -// CHECK-NEXT: [[CALL:%.*]] = call float @__ockl_fdot2(<2 x half> noundef [[TMP0]], <2 x half> noundef [[TMP1]], float noundef [[TMP2]], i1 noundef zeroext [[TOBOOL]]) #[[ATTR2:[0-9]+]] -// CHECK-NEXT: ret float [[CALL]] -// -EXTERN_C float test_fdot2(__2f16 a, __2f16 b, float c, ockl_bool s) { - return __ockl_fdot2(a, b, c, s); -} - - #ifndef __cplusplus enum my_bool { @@ -53,9 +21,9 @@ enum my_bool { true }; -// CHECK-C-LABEL: define hidden i32 @use_my_bool -// CHECK-C-SAME: (i32 noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-C-NEXT: entry: +// CHECK-C-LABEL: define hidden i32 @use_my_bool( +// CHECK-C-SAME: i32 noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-C-NEXT: [[ENTRY:.*:]] // CHECK-C-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-C-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-C-NEXT: [[T:%.*]] = alloca i32, align 4, addrspace(5) @@ -86,4 +54,5 @@ enum my_bool use_my_bool(enum my_bool b) { #pragma omp end declare target //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} // CHECK-CPP: {{.*}}