Skip to content

Commit db98e29

Browse files
authored
[libclc] Move log1p/asinh/acosh/atanh to the CLC library (#132956)
These four functions all related in that they share tables and helper functions. Furthermore, the acosh and atanh builtins call log1p. As with other work in this area, these builtins are now vectorized. To enable this, there are new table accessor functions which return a vector of table values using a vector of indices. These are internally scalarized, in the absence of gather operations. Some tables which were tables of multiple entries (e.g., double2) are split into two separate "low" and "high" tables. This might affect the performance of memory operations but are hopefully mitigated by better codegen overall.
1 parent 1715386 commit db98e29

31 files changed

+1237
-1069
lines changed
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_ACOSH_H__
10+
#define __CLC_MATH_CLC_ACOSH_H__
11+
12+
#define __CLC_BODY <clc/math/unary_decl.inc>
13+
#define __CLC_FUNCTION __clc_acosh
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __CLC_FUNCTION
19+
20+
#endif // __CLC_MATH_CLC_ACOSH_H__
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_ASINH_H__
10+
#define __CLC_MATH_CLC_ASINH_H__
11+
12+
#define __CLC_BODY <clc/math/unary_decl.inc>
13+
#define __CLC_FUNCTION __clc_asinh
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __CLC_FUNCTION
19+
20+
#endif // __CLC_MATH_CLC_ASINH_H__
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_ATANH_H__
10+
#define __CLC_MATH_CLC_ATANH_H__
11+
12+
#define __CLC_BODY <clc/math/unary_decl.inc>
13+
#define __CLC_FUNCTION __clc_atanh
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __CLC_FUNCTION
19+
20+
#endif // __CLC_MATH_CLC_ATANH_H__
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_EP_LOG_H__
10+
#define __CLC_MATH_CLC_EP_LOG_H__
11+
12+
#define __CLC_BODY <clc/math/clc_ep_log.inc>
13+
#include <clc/math/gentype.inc>
14+
15+
#endif // __CLC_MATH_CLC_EP_LOG_H__
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_FPSIZE == 64
10+
11+
_CLC_DECL _CLC_OVERLOAD void __clc_ep_log(__CLC_GENTYPE x,
12+
private __CLC_INTN *xexp,
13+
private __CLC_GENTYPE *r1,
14+
private __CLC_GENTYPE *r2);
15+
16+
#endif
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_LOG1P_H__
10+
#define __CLC_MATH_CLC_LOG1P_H__
11+
12+
#define __CLC_BODY <clc/math/unary_decl.inc>
13+
#define __CLC_FUNCTION __clc_log1p
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __CLC_FUNCTION
19+
20+
#endif // __CLC_MATH_CLC_LOG1P_H__

libclc/clc/include/clc/math/tables.h

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,45 @@
2020
#define TABLE_FUNCTION(TYPE, TABLE, NAME) \
2121
TYPE TABLE_MANGLE(NAME)(size_t idx) { return TABLE[idx]; }
2222

23+
#define CLC_TABLE_FUNCTION(TYPE, TABLE, NAME) \
24+
_CLC_DEF _CLC_OVERLOAD TYPE TABLE_MANGLE(NAME)(int idx) { \
25+
return TABLE[idx]; \
26+
} \
27+
_CLC_DEF _CLC_OVERLOAD TYPE##2 TABLE_MANGLE(NAME)(int##2 idx) { \
28+
return (TYPE##2){TABLE[idx.s0], TABLE[idx.s1]}; \
29+
} \
30+
_CLC_DEF _CLC_OVERLOAD TYPE##3 TABLE_MANGLE(NAME)(int##3 idx) { \
31+
return (TYPE##3){TABLE[idx.s0], TABLE[idx.s1], TABLE[idx.s2]}; \
32+
} \
33+
_CLC_DEF _CLC_OVERLOAD TYPE##4 TABLE_MANGLE(NAME)(int##4 idx) { \
34+
return (TYPE##4){TABLE[idx.s0], TABLE[idx.s1], TABLE[idx.s2], \
35+
TABLE[idx.s3]}; \
36+
} \
37+
_CLC_DEF _CLC_OVERLOAD TYPE##8 TABLE_MANGLE(NAME)(int##8 idx) { \
38+
return (TYPE##8){TABLE[idx.s0], TABLE[idx.s1], TABLE[idx.s2], \
39+
TABLE[idx.s3], TABLE[idx.s4], TABLE[idx.s5], \
40+
TABLE[idx.s6], TABLE[idx.s7]}; \
41+
} \
42+
_CLC_DEF _CLC_OVERLOAD TYPE##16 TABLE_MANGLE(NAME)(int##16 idx) { \
43+
return (TYPE##16){ \
44+
TABLE[idx.s0], TABLE[idx.s1], TABLE[idx.s2], TABLE[idx.s3], \
45+
TABLE[idx.s4], TABLE[idx.s5], TABLE[idx.s6], TABLE[idx.s7], \
46+
TABLE[idx.s8], TABLE[idx.s9], TABLE[idx.sA], TABLE[idx.sB], \
47+
TABLE[idx.sC], TABLE[idx.sD], TABLE[idx.sE], TABLE[idx.sF]}; \
48+
}
49+
2350
#define TABLE_FUNCTION_DECL(TYPE, NAME) TYPE TABLE_MANGLE(NAME)(size_t idx);
2451

52+
#define CLC_TABLE_FUNCTION_DECL(TYPE, NAME) \
53+
_CLC_DECL _CLC_OVERLOAD TYPE TABLE_MANGLE(NAME)(int idx); \
54+
_CLC_DECL _CLC_OVERLOAD TYPE##2 TABLE_MANGLE(NAME)(int##2 idx); \
55+
_CLC_DECL _CLC_OVERLOAD TYPE##3 TABLE_MANGLE(NAME)(int##3 idx); \
56+
_CLC_DECL _CLC_OVERLOAD TYPE##4 TABLE_MANGLE(NAME)(int##4 idx); \
57+
_CLC_DECL _CLC_OVERLOAD TYPE##8 TABLE_MANGLE(NAME)(int##8 idx); \
58+
_CLC_DECL _CLC_OVERLOAD TYPE##16 TABLE_MANGLE(NAME)(int##16 idx);
59+
2560
#define USE_TABLE(NAME, IDX) TABLE_MANGLE(NAME)(IDX)
2661

27-
TABLE_FUNCTION_DECL(float2, loge_tbl);
28-
TABLE_FUNCTION_DECL(float, log_inv_tbl);
2962
TABLE_FUNCTION_DECL(float2, log_inv_tbl_ep);
3063
TABLE_FUNCTION_DECL(float2, log2_tbl);
3164
TABLE_FUNCTION_DECL(float2, log10_tbl);
@@ -35,11 +68,17 @@ TABLE_FUNCTION_DECL(float2, cbrt_tbl);
3568
TABLE_FUNCTION_DECL(float, exp_tbl);
3669
TABLE_FUNCTION_DECL(float2, exp_tbl_ep);
3770

71+
CLC_TABLE_FUNCTION_DECL(float, loge_tbl_lo);
72+
CLC_TABLE_FUNCTION_DECL(float, loge_tbl_hi);
73+
CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl);
74+
3875
#ifdef cl_khr_fp64
3976

4077
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
4178

4279
TABLE_FUNCTION_DECL(double2, ln_tbl);
80+
CLC_TABLE_FUNCTION_DECL(double, ln_tbl_lo);
81+
CLC_TABLE_FUNCTION_DECL(double, ln_tbl_hi);
4382
TABLE_FUNCTION_DECL(double2, atan_jby256_tbl);
4483
TABLE_FUNCTION_DECL(double2, two_to_jby64_ep_tbl);
4584
TABLE_FUNCTION_DECL(double2, sinh_tbl);

libclc/clc/lib/generic/SOURCES

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,17 @@ integer/clc_rotate.cl
1818
integer/clc_sub_sat.cl
1919
integer/clc_upsample.cl
2020
math/clc_acos.cl
21+
math/clc_acosh.cl
2122
math/clc_acospi.cl
2223
math/clc_asin.cl
24+
math/clc_asinh.cl
2325
math/clc_asinpi.cl
2426
math/clc_atan.cl
27+
math/clc_atanh.cl
2528
math/clc_atanpi.cl
2629
math/clc_ceil.cl
2730
math/clc_copysign.cl
31+
math/clc_ep_log.cl
2832
math/clc_fabs.cl
2933
math/clc_fma.cl
3034
math/clc_floor.cl
@@ -33,6 +37,7 @@ math/clc_hypot.cl
3337
math/clc_ldexp.cl
3438
math/clc_log.cl
3539
math/clc_log10.cl
40+
math/clc_log1p.cl
3641
math/clc_log2.cl
3742
math/clc_mad.cl
3843
math/clc_modf.cl
@@ -44,6 +49,7 @@ math/clc_rsqrt.cl
4449
math/clc_sincos_helpers.cl
4550
math/clc_sqrt.cl
4651
math/clc_sw_fma.cl
52+
math/clc_tables.cl
4753
math/clc_trunc.cl
4854
relational/clc_all.cl
4955
relational/clc_any.cl
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/float/definitions.h>
11+
#include <clc/internal/clc.h>
12+
#include <clc/math/clc_ep_log.h>
13+
#include <clc/math/clc_fabs.h>
14+
#include <clc/math/clc_fma.h>
15+
#include <clc/math/clc_log1p.h>
16+
#include <clc/math/clc_mad.h>
17+
#include <clc/math/clc_sqrt.h>
18+
#include <clc/math/math.h>
19+
#include <clc/relational/clc_isinf.h>
20+
#include <clc/relational/clc_isnan.h>
21+
#include <clc/relational/clc_select.h>
22+
23+
#define __CLC_BODY <clc_acosh.inc>
24+
#include <clc/math/gentype.inc>
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_FPSIZE == 32
10+
11+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_acosh(__CLC_GENTYPE x) {
12+
__CLC_UINTN ux = __CLC_AS_UINTN(x);
13+
14+
// Arguments greater than 1/sqrt(epsilon) in magnitude are approximated by
15+
// acosh(x) = ln(2) + ln(x)
16+
// For 2.0 <= x <= 1/sqrt(epsilon) the approximation is:
17+
// acosh(x) = ln(x + sqrt(x * x - 1))
18+
__CLC_INTN high = ux > 0x46000000U;
19+
__CLC_INTN med = ux > 0x40000000U;
20+
21+
__CLC_GENTYPE w = x - 1.0f;
22+
__CLC_GENTYPE s = w * w + 2.0f * w;
23+
__CLC_GENTYPE t = x * x - 1.0f;
24+
__CLC_GENTYPE r = __clc_sqrt(med ? t : s) + (med ? x : w);
25+
__CLC_GENTYPE v = (high ? x : r) - (med ? 1.0f : 0.0f);
26+
__CLC_GENTYPE z = __clc_log1p(v) + (high ? 0x1.62e430p-1f : 0.0f);
27+
28+
z = __clc_select(z, x, __clc_isnan(x) || __clc_isinf(x));
29+
z = x < 1.0f ? __CLC_GENTYPE_NAN : z;
30+
31+
return z;
32+
}
33+
34+
#elif __CLC_FPSIZE == 64
35+
36+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_acosh(__CLC_GENTYPE x) {
37+
// 1/sqrt(eps) = 9.49062656242515593767e+07
38+
const __CLC_GENTYPE recrteps = 0x1.6a09e667f3bcdp+26;
39+
// log2_lead and log2_tail sum to an extra-precise version of log(2)
40+
const __CLC_GENTYPE log2_lead = 0x1.62e42ep-1;
41+
const __CLC_GENTYPE log2_tail = 0x1.efa39ef35793cp-25;
42+
43+
// Handle x >= 128 here
44+
__CLC_LONGN xlarge = x > recrteps;
45+
__CLC_GENTYPE r = x + __clc_sqrt(__clc_fma(x, x, -1.0));
46+
r = xlarge ? x : r;
47+
48+
__CLC_INTN xexp;
49+
__CLC_GENTYPE r1, r2;
50+
__clc_ep_log(r, &xexp, &r1, &r2);
51+
52+
__CLC_GENTYPE dxexp = __CLC_CONVERT_GENTYPE(
53+
__CLC_CONVERT_LONGN(xexp) + (xlarge ? (__CLC_LONGN)1 : (__CLC_LONGN)0));
54+
r1 = __clc_fma(dxexp, log2_lead, r1);
55+
r2 = __clc_fma(dxexp, log2_tail, r2);
56+
57+
__CLC_GENTYPE ret1 = r1 + r2;
58+
59+
// Handle 1 < x < 128 here
60+
// We compute the value
61+
// t = x - 1.0 + sqrt(2.0*(x - 1.0) + (x - 1.0)*(x - 1.0))
62+
// using simulated quad precision.
63+
__CLC_GENTYPE t = x - 1.0;
64+
__CLC_GENTYPE u1 = t * 2.0;
65+
66+
// (t,0) * (t,0) -> (v1, v2)
67+
__CLC_GENTYPE v1 = t * t;
68+
__CLC_GENTYPE v2 = __clc_fma(t, t, -v1);
69+
70+
// (u1,0) + (v1,v2) -> (w1,w2)
71+
r = u1 + v1;
72+
__CLC_GENTYPE s = (((u1 - r) + v1) + v2);
73+
__CLC_GENTYPE w1 = r + s;
74+
__CLC_GENTYPE w2 = (r - w1) + s;
75+
76+
// sqrt(w1,w2) -> (u1,u2)
77+
__CLC_GENTYPE p1 = __clc_sqrt(w1);
78+
__CLC_GENTYPE a1 = p1 * p1;
79+
__CLC_GENTYPE a2 = __clc_fma(p1, p1, -a1);
80+
__CLC_GENTYPE temp = (((w1 - a1) - a2) + w2);
81+
__CLC_GENTYPE p2 = MATH_DIVIDE(temp * 0.5, p1);
82+
u1 = p1 + p2;
83+
__CLC_GENTYPE u2 = (p1 - u1) + p2;
84+
85+
// (u1,u2) + (t,0) -> (r1,r2)
86+
r = u1 + t;
87+
s = ((u1 - r) + t) + u2;
88+
// r1 = r + s;
89+
// r2 = (r - r1) + s;
90+
// t = r1 + r2;
91+
t = r + s;
92+
93+
// For arguments 1.13 <= x <= 1.5 the log1p function is good enough
94+
__CLC_GENTYPE ret2 = __clc_log1p(t);
95+
96+
__CLC_GENTYPE ret = x >= 128.0 ? ret1 : ret2;
97+
98+
ret = (__clc_isinf(x) || __clc_isnan(x)) ? x : ret;
99+
ret = x == 1.0 ? 0.0 : ret;
100+
ret = x < 1.0 ? __CLC_GENTYPE_NAN : ret;
101+
102+
return ret;
103+
}
104+
105+
#elif __CLC_FPSIZE == 16
106+
107+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_acosh(__CLC_GENTYPE x) {
108+
return __CLC_CONVERT_GENTYPE(__clc_acosh(__CLC_CONVERT_FLOATN(x)));
109+
}
110+
111+
#endif
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/float/definitions.h>
11+
#include <clc/internal/clc.h>
12+
#include <clc/math/clc_ep_log.h>
13+
#include <clc/math/clc_fabs.h>
14+
#include <clc/math/clc_fma.h>
15+
#include <clc/math/clc_log.h>
16+
#include <clc/math/clc_mad.h>
17+
#include <clc/math/clc_sqrt.h>
18+
#include <clc/math/math.h>
19+
#include <clc/relational/clc_isinf.h>
20+
#include <clc/relational/clc_isnan.h>
21+
#include <clc/relational/clc_select.h>
22+
23+
#define __CLC_BODY <clc_asinh.inc>
24+
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)