Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions clang/lib/Headers/__clang_hip_libdevice_declares.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,15 +285,6 @@ __device__ __attribute__((pure)) _Float16 __ocml_pown_f16(_Float16, int);
typedef _Float16 __2f16 __attribute__((ext_vector_type(2)));
typedef short __2i16 __attribute__((ext_vector_type(2)));

// We need to match C99's bool and get an i1 in the IR.
#ifdef __cplusplus
typedef bool __ockl_bool;
#else
typedef _Bool __ockl_bool;
#endif

__device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b,
float c, __ockl_bool s);
__device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16);
__device__ __attribute__((const)) __2f16 __ocml_fabs_2f16(__2f16);
__device__ __2f16 __ocml_cos_2f16(__2f16);
Expand Down
51 changes: 0 additions & 51 deletions clang/test/Headers/__clang_hip_libdevice_declares.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,57 +84,6 @@ TEST_FUNC_ATTRS float test_ockl_acos_f32(float src) {
return __ocml_acos_f32(src);
}

// CHECK-LABEL: define internal float @_ZL15test_ockl_fdot2Dv2_DF16_S_fbi
// CHECK-SAME: (<2 x half> [[A:%.*]], <2 x half> [[B:%.*]], float [[C:%.*]], i1 zeroext [[S:%.*]], i32 [[S_INT:%.*]]) #[[ATTR2]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[S_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
// CHECK-NEXT: [[S_INT_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[Y:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// CHECK-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_ADDR]] to ptr
// CHECK-NEXT: [[S_INT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_INT_ADDR]] to ptr
// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
// CHECK-NEXT: [[Y_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[Y]] to ptr
// CHECK-NEXT: store <2 x half> [[A]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: store <2 x half> [[B]], ptr [[B_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[C]], ptr [[C_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[S]] to i8
// CHECK-NEXT: store i8 [[FROMBOOL]], ptr [[S_ADDR_ASCAST]], align 1
// CHECK-NEXT: store i32 [[S_INT]], ptr [[S_INT_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[B_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[C_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[S_ADDR_ASCAST]], align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1
// CHECK-NEXT: [[CALL:%.*]] = call float @__ockl_fdot2(<2 x half> [[TMP0]], <2 x half> [[TMP1]], float [[TMP2]], i1 zeroext [[TOBOOL]]) #[[ATTR4]]
// CHECK-NEXT: store float [[CALL]], ptr [[X_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP5:%.*]] = load <2 x half>, ptr [[B_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[C_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[S_INT_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TOBOOL1:%.*]] = icmp ne i32 [[TMP7]], 0
// CHECK-NEXT: [[CALL2:%.*]] = call float @__ockl_fdot2(<2 x half> [[TMP4]], <2 x half> [[TMP5]], float [[TMP6]], i1 zeroext [[TOBOOL1]]) #[[ATTR4]]
// CHECK-NEXT: store float [[CALL2]], ptr [[Y_ASCAST]], align 4
// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[X_ASCAST]], align 4
// CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[Y_ASCAST]], align 4
// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP8]], [[TMP9]]
// CHECK-NEXT: ret float [[ADD]]
//
TEST_FUNC_ATTRS float test_ockl_fdot2(__2f16 a, __2f16 b, float c, bool s, int s_int) {
float x = __ockl_fdot2(a, b, c, s);
float y = __ockl_fdot2(a, b, c, s_int);
return x + y;
}


#ifdef _OPENMP
#pragma omp end declare target
#endif
Expand Down
43 changes: 6 additions & 37 deletions clang/test/Headers/openmp-device-functions-bool.c
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
// RUN: %clang_cc1 -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck %s --check-prefixes=CHECK,CHECK-C
// RUN: %clang_cc1 -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck %s --check-prefixes=CHECK,CHECK-CPP

// Test that we did not include <stdbool.h> in C, and OCKL functions using bool
// produce an i1
// Test that we did not include <stdbool.h> in C

#ifdef __cplusplus
typedef bool ockl_bool;
Expand All @@ -15,47 +14,16 @@ typedef _Bool ockl_bool;

#pragma omp begin declare target

// CHECK-LABEL: define hidden float @test_fdot2
// CHECK-SAME: (<2 x half> noundef [[A:%.*]], <2 x half> noundef [[B:%.*]], float noundef [[C:%.*]], i1 noundef zeroext [[S:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[S_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
// CHECK-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_ADDR]] to ptr
// CHECK-NEXT: store <2 x half> [[A]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: store <2 x half> [[B]], ptr [[B_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[C]], ptr [[C_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[S]] to i8
// CHECK-NEXT: store i8 [[FROMBOOL]], ptr [[S_ADDR_ASCAST]], align 1
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[B_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[C_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[S_ADDR_ASCAST]], align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1
// CHECK-NEXT: [[CALL:%.*]] = call float @__ockl_fdot2(<2 x half> noundef [[TMP0]], <2 x half> noundef [[TMP1]], float noundef [[TMP2]], i1 noundef zeroext [[TOBOOL]]) #[[ATTR2:[0-9]+]]
// CHECK-NEXT: ret float [[CALL]]
//
EXTERN_C float test_fdot2(__2f16 a, __2f16 b, float c, ockl_bool s) {
return __ockl_fdot2(a, b, c, s);
}


#ifndef __cplusplus

enum my_bool {
false,
true
};

// CHECK-C-LABEL: define hidden i32 @use_my_bool
// CHECK-C-SAME: (i32 noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-C-NEXT: entry:
// CHECK-C-LABEL: define hidden i32 @use_my_bool(
// CHECK-C-SAME: i32 noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-C-NEXT: [[ENTRY:.*:]]
// CHECK-C-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-C-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-C-NEXT: [[T:%.*]] = alloca i32, align 4, addrspace(5)
Expand Down Expand Up @@ -86,4 +54,5 @@ enum my_bool use_my_bool(enum my_bool b) {

#pragma omp end declare target
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
// CHECK: {{.*}}
// CHECK-CPP: {{.*}}