Skip to content

Commit e439517

Browse files
committed
clang/HIP: Remove __ockl_fdot2 declaration
The builtin headers should not be in the business of exporting ockl functions, and only declaring the minimum which are actively used by the builtin headers.
1 parent 40ae983 commit e439517

File tree

3 files changed

+6
-97
lines changed

3 files changed

+6
-97
lines changed

clang/lib/Headers/__clang_hip_libdevice_declares.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -285,15 +285,6 @@ __device__ __attribute__((pure)) _Float16 __ocml_pown_f16(_Float16, int);
285285
typedef _Float16 __2f16 __attribute__((ext_vector_type(2)));
286286
typedef short __2i16 __attribute__((ext_vector_type(2)));
287287

288-
// We need to match C99's bool and get an i1 in the IR.
289-
#ifdef __cplusplus
290-
typedef bool __ockl_bool;
291-
#else
292-
typedef _Bool __ockl_bool;
293-
#endif
294-
295-
__device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b,
296-
float c, __ockl_bool s);
297288
__device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16);
298289
__device__ __attribute__((const)) __2f16 __ocml_fabs_2f16(__2f16);
299290
__device__ __2f16 __ocml_cos_2f16(__2f16);

clang/test/Headers/__clang_hip_libdevice_declares.cpp

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -84,57 +84,6 @@ TEST_FUNC_ATTRS float test_ockl_acos_f32(float src) {
8484
return __ocml_acos_f32(src);
8585
}
8686

87-
// CHECK-LABEL: define internal float @_ZL15test_ockl_fdot2Dv2_DF16_S_fbi
88-
// CHECK-SAME: (<2 x half> [[A:%.*]], <2 x half> [[B:%.*]], float [[C:%.*]], i1 zeroext [[S:%.*]], i32 [[S_INT:%.*]]) #[[ATTR2]] {
89-
// CHECK-NEXT: entry:
90-
// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
91-
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
92-
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
93-
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float, align 4, addrspace(5)
94-
// CHECK-NEXT: [[S_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
95-
// CHECK-NEXT: [[S_INT_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
96-
// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5)
97-
// CHECK-NEXT: [[Y:%.*]] = alloca float, align 4, addrspace(5)
98-
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
99-
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
100-
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
101-
// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
102-
// CHECK-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_ADDR]] to ptr
103-
// CHECK-NEXT: [[S_INT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_INT_ADDR]] to ptr
104-
// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
105-
// CHECK-NEXT: [[Y_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[Y]] to ptr
106-
// CHECK-NEXT: store <2 x half> [[A]], ptr [[A_ADDR_ASCAST]], align 4
107-
// CHECK-NEXT: store <2 x half> [[B]], ptr [[B_ADDR_ASCAST]], align 4
108-
// CHECK-NEXT: store float [[C]], ptr [[C_ADDR_ASCAST]], align 4
109-
// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[S]] to i8
110-
// CHECK-NEXT: store i8 [[FROMBOOL]], ptr [[S_ADDR_ASCAST]], align 1
111-
// CHECK-NEXT: store i32 [[S_INT]], ptr [[S_INT_ADDR_ASCAST]], align 4
112-
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
113-
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[B_ADDR_ASCAST]], align 4
114-
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[C_ADDR_ASCAST]], align 4
115-
// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[S_ADDR_ASCAST]], align 1
116-
// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1
117-
// CHECK-NEXT: [[CALL:%.*]] = call float @__ockl_fdot2(<2 x half> [[TMP0]], <2 x half> [[TMP1]], float [[TMP2]], i1 zeroext [[TOBOOL]]) #[[ATTR4]]
118-
// CHECK-NEXT: store float [[CALL]], ptr [[X_ASCAST]], align 4
119-
// CHECK-NEXT: [[TMP4:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
120-
// CHECK-NEXT: [[TMP5:%.*]] = load <2 x half>, ptr [[B_ADDR_ASCAST]], align 4
121-
// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[C_ADDR_ASCAST]], align 4
122-
// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[S_INT_ADDR_ASCAST]], align 4
123-
// CHECK-NEXT: [[TOBOOL1:%.*]] = icmp ne i32 [[TMP7]], 0
124-
// CHECK-NEXT: [[CALL2:%.*]] = call float @__ockl_fdot2(<2 x half> [[TMP4]], <2 x half> [[TMP5]], float [[TMP6]], i1 zeroext [[TOBOOL1]]) #[[ATTR4]]
125-
// CHECK-NEXT: store float [[CALL2]], ptr [[Y_ASCAST]], align 4
126-
// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[X_ASCAST]], align 4
127-
// CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[Y_ASCAST]], align 4
128-
// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP8]], [[TMP9]]
129-
// CHECK-NEXT: ret float [[ADD]]
130-
//
131-
TEST_FUNC_ATTRS float test_ockl_fdot2(__2f16 a, __2f16 b, float c, bool s, int s_int) {
132-
float x = __ockl_fdot2(a, b, c, s);
133-
float y = __ockl_fdot2(a, b, c, s_int);
134-
return x + y;
135-
}
136-
137-
13887
#ifdef _OPENMP
13988
#pragma omp end declare target
14089
#endif
Lines changed: 6 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
22
// RUN: %clang_cc1 -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck %s --check-prefixes=CHECK,CHECK-C
33
// RUN: %clang_cc1 -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck %s --check-prefixes=CHECK,CHECK-CPP
44

5-
// Test that we did not include <stdbool.h> in C, and OCKL functions using bool
6-
// produce an i1
5+
// Test that we did not include <stdbool.h> in C
76

87
#ifdef __cplusplus
98
typedef bool ockl_bool;
@@ -15,47 +14,16 @@ typedef _Bool ockl_bool;
1514

1615
#pragma omp begin declare target
1716

18-
// CHECK-LABEL: define hidden float @test_fdot2
19-
// CHECK-SAME: (<2 x half> noundef [[A:%.*]], <2 x half> noundef [[B:%.*]], float noundef [[C:%.*]], i1 noundef zeroext [[S:%.*]]) #[[ATTR0:[0-9]+]] {
20-
// CHECK-NEXT: entry:
21-
// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
22-
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
23-
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
24-
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float, align 4, addrspace(5)
25-
// CHECK-NEXT: [[S_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
26-
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
27-
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
28-
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
29-
// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
30-
// CHECK-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_ADDR]] to ptr
31-
// CHECK-NEXT: store <2 x half> [[A]], ptr [[A_ADDR_ASCAST]], align 4
32-
// CHECK-NEXT: store <2 x half> [[B]], ptr [[B_ADDR_ASCAST]], align 4
33-
// CHECK-NEXT: store float [[C]], ptr [[C_ADDR_ASCAST]], align 4
34-
// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[S]] to i8
35-
// CHECK-NEXT: store i8 [[FROMBOOL]], ptr [[S_ADDR_ASCAST]], align 1
36-
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
37-
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[B_ADDR_ASCAST]], align 4
38-
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[C_ADDR_ASCAST]], align 4
39-
// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[S_ADDR_ASCAST]], align 1
40-
// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1
41-
// CHECK-NEXT: [[CALL:%.*]] = call float @__ockl_fdot2(<2 x half> noundef [[TMP0]], <2 x half> noundef [[TMP1]], float noundef [[TMP2]], i1 noundef zeroext [[TOBOOL]]) #[[ATTR2:[0-9]+]]
42-
// CHECK-NEXT: ret float [[CALL]]
43-
//
44-
EXTERN_C float test_fdot2(__2f16 a, __2f16 b, float c, ockl_bool s) {
45-
return __ockl_fdot2(a, b, c, s);
46-
}
47-
48-
4917
#ifndef __cplusplus
5018

5119
enum my_bool {
5220
false,
5321
true
5422
};
5523

56-
// CHECK-C-LABEL: define hidden i32 @use_my_bool
57-
// CHECK-C-SAME: (i32 noundef [[B:%.*]]) #[[ATTR0]] {
58-
// CHECK-C-NEXT: entry:
24+
// CHECK-C-LABEL: define hidden i32 @use_my_bool(
25+
// CHECK-C-SAME: i32 noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
26+
// CHECK-C-NEXT: [[ENTRY:.*:]]
5927
// CHECK-C-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
6028
// CHECK-C-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
6129
// CHECK-C-NEXT: [[T:%.*]] = alloca i32, align 4, addrspace(5)
@@ -86,4 +54,5 @@ enum my_bool use_my_bool(enum my_bool b) {
8654

8755
#pragma omp end declare target
8856
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
57+
// CHECK: {{.*}}
8958
// CHECK-CPP: {{.*}}

0 commit comments

Comments
 (0)