Skip to content

Commit de3a9ea

Browse files
authored
[NFC][libclc] Simplify clc_dot and dot implementation (#142922)
llvm-diff shows no change to amdgcn--amdhsa.bc
1 parent 16b0d2f commit de3a9ea

File tree

3 files changed

+31
-112
lines changed

3 files changed

+31
-112
lines changed

libclc/clc/lib/generic/geometric/clc_dot.cl

Lines changed: 3 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -7,59 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include <clc/internal/clc.h>
10+
#include <clc/math/clc_fma.h>
1011

11-
_CLC_OVERLOAD _CLC_DEF float __clc_dot(float p0, float p1) { return p0 * p1; }
12-
13-
_CLC_OVERLOAD _CLC_DEF float __clc_dot(float2 p0, float2 p1) {
14-
return p0.x * p1.x + p0.y * p1.y;
15-
}
16-
17-
_CLC_OVERLOAD _CLC_DEF float __clc_dot(float3 p0, float3 p1) {
18-
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
19-
}
20-
21-
_CLC_OVERLOAD _CLC_DEF float __clc_dot(float4 p0, float4 p1) {
22-
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
23-
}
24-
25-
#ifdef cl_khr_fp64
26-
27-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
28-
29-
_CLC_OVERLOAD _CLC_DEF double __clc_dot(double p0, double p1) {
30-
return p0 * p1;
31-
}
32-
33-
_CLC_OVERLOAD _CLC_DEF double __clc_dot(double2 p0, double2 p1) {
34-
return p0.x * p1.x + p0.y * p1.y;
35-
}
36-
37-
_CLC_OVERLOAD _CLC_DEF double __clc_dot(double3 p0, double3 p1) {
38-
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
39-
}
40-
41-
_CLC_OVERLOAD _CLC_DEF double __clc_dot(double4 p0, double4 p1) {
42-
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
43-
}
44-
45-
#endif
46-
47-
#ifdef cl_khr_fp16
48-
49-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
50-
51-
_CLC_OVERLOAD _CLC_DEF half __clc_dot(half p0, half p1) { return p0 * p1; }
52-
53-
_CLC_OVERLOAD _CLC_DEF half __clc_dot(half2 p0, half2 p1) {
54-
return p0.x * p1.x + p0.y * p1.y;
55-
}
56-
57-
_CLC_OVERLOAD _CLC_DEF half __clc_dot(half3 p0, half3 p1) {
58-
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
59-
}
60-
61-
_CLC_OVERLOAD _CLC_DEF half __clc_dot(half4 p0, half4 p1) {
62-
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
63-
}
64-
65-
#endif
12+
#define __CLC_BODY <clc_dot.inc>
13+
#include <clc/math/gentype.inc>
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if (__CLC_VECSIZE_OR_1 == 1 || __CLC_VECSIZE_OR_1 == 2 || \
10+
__CLC_VECSIZE_OR_1 == 3 || __CLC_VECSIZE_OR_1 == 4)
11+
12+
_CLC_OVERLOAD _CLC_DEF __CLC_SCALAR_GENTYPE __clc_dot(__CLC_GENTYPE x,
13+
__CLC_GENTYPE y) {
14+
#if __CLC_VECSIZE_OR_1 == 1
15+
return x * y;
16+
#elif __CLC_VECSIZE_OR_1 == 2
17+
return x.s0 * y.s0 + x.s1 * y.s1;
18+
#elif __CLC_VECSIZE_OR_1 == 3
19+
return x.s0 * y.s0 + x.s1 * y.s1 + x.s2 * y.s2;
20+
#else
21+
return x.s0 * y.s0 + x.s1 * y.s1 + x.s2 * y.s2 + x.s3 * y.s3;
22+
#endif
23+
}
24+
25+
#endif

libclc/opencl/lib/generic/geometric/dot.cl

Lines changed: 3 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -9,60 +9,6 @@
99
#include <clc/geometric/clc_dot.h>
1010
#include <clc/opencl/clc.h>
1111

12-
_CLC_OVERLOAD _CLC_DEF float dot(float p0, float p1) {
13-
return __clc_dot(p0, p1);
14-
}
15-
16-
_CLC_OVERLOAD _CLC_DEF float dot(float2 p0, float2 p1) {
17-
return __clc_dot(p0, p1);
18-
}
19-
20-
_CLC_OVERLOAD _CLC_DEF float dot(float3 p0, float3 p1) {
21-
return __clc_dot(p0, p1);
22-
}
23-
24-
_CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) {
25-
return __clc_dot(p0, p1);
26-
}
27-
28-
#ifdef cl_khr_fp64
29-
30-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
31-
32-
_CLC_OVERLOAD _CLC_DEF double dot(double p0, double p1) {
33-
return __clc_dot(p0, p1);
34-
}
35-
36-
_CLC_OVERLOAD _CLC_DEF double dot(double2 p0, double2 p1) {
37-
return __clc_dot(p0, p1);
38-
}
39-
40-
_CLC_OVERLOAD _CLC_DEF double dot(double3 p0, double3 p1) {
41-
return __clc_dot(p0, p1);
42-
}
43-
44-
_CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) {
45-
return __clc_dot(p0, p1);
46-
}
47-
48-
#endif
49-
50-
#ifdef cl_khr_fp16
51-
52-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
53-
54-
_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) { return __clc_dot(p0, p1); }
55-
56-
_CLC_OVERLOAD _CLC_DEF half dot(half2 p0, half2 p1) {
57-
return __clc_dot(p0, p1);
58-
}
59-
60-
_CLC_OVERLOAD _CLC_DEF half dot(half3 p0, half3 p1) {
61-
return __clc_dot(p0, p1);
62-
}
63-
64-
_CLC_OVERLOAD _CLC_DEF half dot(half4 p0, half4 p1) {
65-
return __clc_dot(p0, p1);
66-
}
67-
68-
#endif
12+
#define FUNCTION dot
13+
#define __CLC_BODY <clc/geometric/binary_def.inc>
14+
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)