Skip to content

Commit 910a4bf

Browse files
[compiler-rt] Implement __extendxftf2 and __trunctfxf2 for x86_64 (#66918)
This patch implements __extendxftf2 (long double -> f128) and __trunctfxf2 (f128 -> long double) on x86_64. This is a preparation to unblock https://reviews.llvm.org/D53608, We intentionally do not modify compiler-rt/lib/builtins/fp_lib.h in this PR (in particular, to limit the scope and avoid exposing other functions on X86_64 in this PR). Instead, TODOs were added to use fp_lib.h once it is available. Test plan: 1. ninja check-compiler-rt (verified on X86_64 and on Aarch64) In particular, new tests (extendxftf2_test.c and trunctfxf2_test.c) were added. 2. compared the results of conversions with what other compilers (gcc) produce.
1 parent e35cb73 commit 910a4bf

21 files changed

+564
-151
lines changed

compiler-rt/lib/builtins/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ endif ()
280280
# long double is not 80 bits on Android or MSVC.
281281
set(x86_80_BIT_SOURCES
282282
divxc3.c
283+
extendxftf2.c
283284
fixxfdi.c
284285
fixxfti.c
285286
fixunsxfdi.c
@@ -291,6 +292,7 @@ set(x86_80_BIT_SOURCES
291292
floatuntixf.c
292293
mulxc3.c
293294
powixf2.c
295+
trunctfxf2.c
294296
)
295297

296298
if (NOT MSVC)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===-- lib/extendxftf2.c - long double -> quad conversion --------*- C -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// Assumption: long double is a IEEE 80 bit floating point type padded to 128
10+
// bits.
11+
12+
// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
13+
#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) && \
14+
(defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
15+
#define SRC_80
16+
#define DST_QUAD
17+
#include "fp_extend_impl.inc"
18+
19+
COMPILER_RT_ABI __float128 __extendxftf2(long double a) {
20+
return __extendXfYf2__(a);
21+
}
22+
23+
#endif

compiler-rt/lib/builtins/fp_extend.h

Lines changed: 81 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,22 @@
2020
typedef float src_t;
2121
typedef uint32_t src_rep_t;
2222
#define SRC_REP_C UINT32_C
23-
static const int srcSigBits = 23;
23+
static const int srcBits = sizeof(src_t) * CHAR_BIT;
24+
static const int srcSigFracBits = 23;
25+
// -1 accounts for the sign bit.
26+
static const int srcExpBits = srcBits - srcSigFracBits - 1;
2427
#define src_rep_t_clz clzsi
2528

2629
#elif defined SRC_DOUBLE
2730
typedef double src_t;
2831
typedef uint64_t src_rep_t;
2932
#define SRC_REP_C UINT64_C
30-
static const int srcSigBits = 52;
31-
static __inline int src_rep_t_clz(src_rep_t a) {
33+
static const int srcBits = sizeof(src_t) * CHAR_BIT;
34+
static const int srcSigFracBits = 52;
35+
// -1 accounts for the sign bit.
36+
static const int srcExpBits = srcBits - srcSigFracBits - 1;
37+
38+
static inline int src_rep_t_clz_impl(src_rep_t a) {
3239
#if defined __LP64__
3340
return __builtin_clzl(a);
3441
#else
@@ -38,6 +45,18 @@ static __inline int src_rep_t_clz(src_rep_t a) {
3845
return 32 + clzsi(a & REP_C(0xffffffff));
3946
#endif
4047
}
48+
#define src_rep_t_clz src_rep_t_clz_impl
49+
50+
#elif defined SRC_80
51+
typedef long double src_t;
52+
typedef __uint128_t src_rep_t;
53+
#define SRC_REP_C (__uint128_t)
54+
// sign bit, exponent and significand occupy the lower 80 bits.
55+
static const int srcBits = 80;
56+
static const int srcSigFracBits = 63;
57+
// -1 accounts for the sign bit.
58+
// -1 accounts for the explicitly stored integer bit.
59+
static const int srcExpBits = srcBits - srcSigFracBits - 1 - 1;
4160

4261
#elif defined SRC_HALF
4362
#ifdef COMPILER_RT_HAS_FLOAT16
@@ -47,7 +66,11 @@ typedef uint16_t src_t;
4766
#endif
4867
typedef uint16_t src_rep_t;
4968
#define SRC_REP_C UINT16_C
50-
static const int srcSigBits = 10;
69+
static const int srcBits = sizeof(src_t) * CHAR_BIT;
70+
static const int srcSigFracBits = 10;
71+
// -1 accounts for the sign bit.
72+
static const int srcExpBits = srcBits - srcSigFracBits - 1;
73+
5174
#define src_rep_t_clz __builtin_clz
5275

5376
#else
@@ -58,36 +81,83 @@ static const int srcSigBits = 10;
5881
typedef float dst_t;
5982
typedef uint32_t dst_rep_t;
6083
#define DST_REP_C UINT32_C
61-
static const int dstSigBits = 23;
84+
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
85+
static const int dstSigFracBits = 23;
86+
// -1 accounts for the sign bit.
87+
static const int dstExpBits = dstBits - dstSigFracBits - 1;
6288

6389
#elif defined DST_DOUBLE
6490
typedef double dst_t;
6591
typedef uint64_t dst_rep_t;
6692
#define DST_REP_C UINT64_C
67-
static const int dstSigBits = 52;
93+
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
94+
static const int dstSigFracBits = 52;
95+
// -1 accounts for the sign bit.
96+
static const int dstExpBits = dstBits - dstSigFracBits - 1;
6897

6998
#elif defined DST_QUAD
99+
// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
100+
#if __LDBL_MANT_DIG__ == 113
70101
typedef long double dst_t;
102+
#elif defined(__x86_64__) && \
103+
(defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
104+
typedef __float128 dst_t;
105+
#endif
71106
typedef __uint128_t dst_rep_t;
72107
#define DST_REP_C (__uint128_t)
73-
static const int dstSigBits = 112;
108+
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
109+
static const int dstSigFracBits = 112;
110+
// -1 accounts for the sign bit.
111+
static const int dstExpBits = dstBits - dstSigFracBits - 1;
74112

75113
#else
76114
#error Destination should be single, double, or quad precision!
77115
#endif // end destination precision
78116

79-
// End of specialization parameters. Two helper routines for conversion to and
80-
// from the representation of floating-point data as integer values follow.
117+
// End of specialization parameters.
118+
119+
// TODO: These helper routines should be placed into fp_lib.h
120+
// Currently they depend on macros/constants defined above.
121+
122+
static inline src_rep_t extract_sign_from_src(src_rep_t x) {
123+
const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1);
124+
return (x & srcSignMask) >> (srcBits - 1);
125+
}
126+
127+
static inline src_rep_t extract_exp_from_src(src_rep_t x) {
128+
const int srcSigBits = srcBits - 1 - srcExpBits;
129+
const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
130+
return (x & srcExpMask) >> srcSigBits;
131+
}
132+
133+
static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) {
134+
const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
135+
return x & srcSigFracMask;
136+
}
137+
138+
#ifdef src_rep_t_clz
139+
static inline int clz_in_sig_frac(src_rep_t sigFrac) {
140+
const int skip = (sizeof(dst_t) * CHAR_BIT - srcBits) + 1 + srcExpBits;
141+
return src_rep_t_clz(sigFrac) - skip;
142+
}
143+
#endif
144+
145+
static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) {
146+
return (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac;
147+
}
148+
149+
// Two helper routines for conversion to and from the representation of
150+
// floating-point data as integer values follow.
81151

82-
static __inline src_rep_t srcToRep(src_t x) {
152+
static inline src_rep_t srcToRep(src_t x) {
83153
const union {
84154
src_t f;
85155
src_rep_t i;
86156
} rep = {.f = x};
87157
return rep.i;
88158
}
89159

90-
static __inline dst_t dstFromRep(dst_rep_t x) {
160+
static inline dst_t dstFromRep(dst_rep_t x) {
91161
const union {
92162
dst_t f;
93163
dst_rep_t i;

compiler-rt/lib/builtins/fp_extend_impl.inc

Lines changed: 42 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -37,71 +37,72 @@
3737

3838
#include "fp_extend.h"
3939

40+
// The source type may use a usual IEEE-754 interchange format or Intel 80-bit
41+
// format. In particular, for the source type srcSigFracBits may be not equal to
42+
// srcSigBits. The destination type is assumed to be one of IEEE-754 standard
43+
// types.
4044
static __inline dst_t __extendXfYf2__(src_t a) {
4145
// Various constants whose values follow from the type parameters.
4246
// Any reasonable optimizer will fold and propagate all of these.
43-
const int srcBits = sizeof(src_t) * CHAR_BIT;
44-
const int srcExpBits = srcBits - srcSigBits - 1;
4547
const int srcInfExp = (1 << srcExpBits) - 1;
4648
const int srcExpBias = srcInfExp >> 1;
4749

48-
const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits;
49-
const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits;
50-
const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits);
51-
const src_rep_t srcAbsMask = srcSignMask - 1;
52-
const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1);
53-
const src_rep_t srcNaNCode = srcQNaN - 1;
54-
55-
const int dstBits = sizeof(dst_t) * CHAR_BIT;
56-
const int dstExpBits = dstBits - dstSigBits - 1;
5750
const int dstInfExp = (1 << dstExpBits) - 1;
5851
const int dstExpBias = dstInfExp >> 1;
5952

60-
const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits;
61-
6253
// Break a into a sign and representation of the absolute value.
6354
const src_rep_t aRep = srcToRep(a);
64-
const src_rep_t aAbs = aRep & srcAbsMask;
65-
const src_rep_t sign = aRep & srcSignMask;
66-
dst_rep_t absResult;
55+
const src_rep_t srcSign = extract_sign_from_src(aRep);
56+
const src_rep_t srcExp = extract_exp_from_src(aRep);
57+
const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep);
58+
59+
dst_rep_t dstSign = srcSign;
60+
dst_rep_t dstExp;
61+
dst_rep_t dstSigFrac;
6762

68-
// If sizeof(src_rep_t) < sizeof(int), the subtraction result is promoted
69-
// to (signed) int. To avoid that, explicitly cast to src_rep_t.
70-
if ((src_rep_t)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) {
63+
if (srcExp >= 1 && srcExp < srcInfExp) {
7164
// a is a normal number.
72-
// Extend to the destination type by shifting the significand and
73-
// exponent into the proper position and rebiasing the exponent.
74-
absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits);
75-
absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits;
65+
dstExp = (dst_rep_t)srcExp + (dst_rep_t)(dstExpBias - srcExpBias);
66+
dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits);
7667
}
7768

78-
else if (aAbs >= srcInfinity) {
69+
else if (srcExp == srcInfExp) {
7970
// a is NaN or infinity.
80-
// Conjure the result by beginning with infinity, then setting the qNaN
81-
// bit (if needed) and right-aligning the rest of the trailing NaN
82-
// payload field.
83-
absResult = (dst_rep_t)dstInfExp << dstSigBits;
84-
absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits);
85-
absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits);
71+
dstExp = dstInfExp;
72+
dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits);
8673
}
8774

88-
else if (aAbs) {
75+
else if (srcSigFrac) {
8976
// a is denormal.
90-
// renormalize the significand and clear the leading bit, then insert
91-
// the correct adjusted exponent in the destination type.
92-
const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal);
93-
absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale);
94-
absResult ^= dstMinNormal;
95-
const int resultExponent = dstExpBias - srcExpBias - scale + 1;
96-
absResult |= (dst_rep_t)resultExponent << dstSigBits;
77+
if (srcExpBits == dstExpBits) {
78+
// The exponent fields are identical and this is a denormal number, so all
79+
// the non-significand bits are zero. In particular, this branch is always
80+
// taken when we extend a denormal F80 to F128.
81+
dstExp = 0;
82+
dstSigFrac = ((dst_rep_t)srcSigFrac) << (dstSigFracBits - srcSigFracBits);
83+
} else {
84+
#ifndef src_rep_t_clz
85+
// If src_rep_t_clz is not defined this branch must be unreachable.
86+
__builtin_unreachable();
87+
#else
88+
// Renormalize the significand and clear the leading bit.
89+
// For F80 -> F128 this codepath is unused.
90+
const int scale = clz_in_sig_frac(srcSigFrac) + 1;
91+
dstExp = dstExpBias - srcExpBias - scale + 1;
92+
dstSigFrac = (dst_rep_t)srcSigFrac
93+
<< (dstSigFracBits - srcSigFracBits + scale);
94+
const dst_rep_t dstMinNormal = DST_REP_C(1) << (dstBits - 1 - dstExpBits);
95+
dstSigFrac ^= dstMinNormal;
96+
#endif
97+
}
9798
}
9899

99100
else {
100101
// a is zero.
101-
absResult = 0;
102+
dstExp = 0;
103+
dstSigFrac = 0;
102104
}
103105

104-
// Apply the signbit to the absolute value.
105-
const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits);
106+
const dst_rep_t result = construct_dst_rep(dstSign, dstExp, dstSigFrac);
106107
return dstFromRep(result);
107108
}

0 commit comments

Comments
 (0)