Skip to content

Commit 3deff84

Browse files
committed
Correctly rounded floating point div_euclid.
Fixes #107904.
1 parent 33c245b commit 3deff84

File tree

19 files changed

+1367
-65
lines changed

19 files changed

+1367
-65
lines changed

library/std/src/f128.rs

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
//!
55
//! Mathematically significant numbers are provided in the `consts` sub-module.
66
7+
mod div_euclid;
8+
mod u256;
9+
710
#[cfg(test)]
811
mod tests;
912

@@ -235,10 +238,14 @@ impl f128 {
235238

236239
/// Calculates Euclidean division, the matching method for `rem_euclid`.
237240
///
238-
/// This computes the integer `n` such that
241+
/// In infinite precision this computes the integer `n` such that
239242
/// `self = n * rhs + self.rem_euclid(rhs)`.
240-
/// In other words, the result is `self / rhs` rounded to the integer `n`
241-
/// such that `self >= n * rhs`.
243+
/// In other words, the result is `self / rhs` rounded to the
244+
/// integer `n` such that `self >= n * rhs`.
245+
///
246+
/// However, due to a floating point round-off error the result can be rounded if
247+
/// `self` is much larger in magnitude than `rhs`, such that `n` can't be represented
248+
/// exactly.
242249
///
243250
/// # Precision
244251
///
@@ -264,29 +271,23 @@ impl f128 {
264271
#[unstable(feature = "f128", issue = "116909")]
265272
#[must_use = "method returns a new number and does not mutate the original value"]
266273
pub fn div_euclid(self, rhs: f128) -> f128 {
267-
let q = (self / rhs).trunc();
268-
if self % rhs < 0.0 {
269-
return if rhs > 0.0 { q - 1.0 } else { q + 1.0 };
270-
}
271-
q
274+
div_euclid::div_euclid(self, rhs)
272275
}
273276

274277
/// Calculates the least nonnegative remainder of `self (mod rhs)`.
275278
///
276-
/// In particular, the return value `r` satisfies `0.0 <= r < rhs.abs()` in
277-
/// most cases. However, due to a floating point round-off error it can
278-
/// result in `r == rhs.abs()`, violating the mathematical definition, if
279-
/// `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`.
280-
/// This result is not an element of the function's codomain, but it is the
281-
/// closest floating point number in the real numbers and thus fulfills the
282-
/// property `self == self.div_euclid(rhs) * rhs + self.rem_euclid(rhs)`
283-
/// approximately.
279+
/// In infinite precision the return value `r` satisfies `0 <= r < rhs.abs()`.
280+
///
281+
/// However, due to a floating point round-off error the result can round to
282+
/// `rhs.abs()` if `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`.
284283
///
285284
/// # Precision
286285
///
287286
/// The result of this operation is guaranteed to be the rounded
288287
/// infinite-precision result.
289288
///
289+
/// The result is precise when `self >= 0.0`.
290+
///
290291
/// # Examples
291292
///
292293
/// ```

library/std/src/f128/div_euclid.rs

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
#[cfg(test)]
2+
mod tests;
3+
4+
use crate::f128::u256::U256;
5+
6+
/// Software implementation of `f128::div_euclid`.
7+
#[allow(dead_code)]
8+
pub(crate) fn div_euclid(a: f128, b: f128) -> f128 {
9+
if let Some((a_neg, a_exp, a_m)) = normal_form(a)
10+
&& let Some((b_neg, b_exp, b_m)) = normal_form(b)
11+
{
12+
let exp = a_exp - b_exp;
13+
match (a_neg, b_neg) {
14+
(false, false) => div_floor(exp, a_m, b_m),
15+
(true, false) => -div_ceil(exp, a_m, b_m),
16+
(false, true) => -div_floor(exp, a_m, b_m),
17+
(true, true) => div_ceil(exp, a_m, b_m),
18+
}
19+
} else {
20+
// `a` or `b` are +-0.0 or infinity or NaN.
21+
// `a / b` is also +-0.0 or infinity or NaN.
22+
// There is no need to round to an integer.
23+
a / b
24+
}
25+
}
26+
27+
/// Returns `floor((a << exp) / b)`.
28+
///
29+
/// Requires `2^112 <= a, b < 2^113`.
30+
fn div_floor(exp: i32, a: u128, b: u128) -> f128 {
31+
if exp < 0 {
32+
0.0
33+
} else if exp <= 15 {
34+
// aa < (2^113 << 15) = 2^128
35+
let aa = a << exp;
36+
// q < 2^128 / 2^112 = 2^16
37+
let q = (aa / b) as u32;
38+
// We have to use `as` because `From<u32> for f128` is not yet implemented.
39+
q as f128
40+
} else if exp <= 127 {
41+
// aa = a << exp
42+
// aa < (2^113 << 127) = 2^240
43+
let aa = U256::shl_u128(a, exp as u32);
44+
// q < 2^240 / 2^112 = 2^128
45+
let (q, _) = aa.div_rem(b);
46+
q as f128
47+
} else {
48+
// aa >= (2^112 << 127) = 2^239
49+
// aa < (2^113 << 127) = 2^240
50+
let aa = U256::shl_u128(a, 127);
51+
// e > 0
52+
// The result is floor((aa << e) / b).
53+
let e = (exp - 127) as u32;
54+
55+
// aa = q * b + r
56+
// q >= 2^239 / 2^113 = 2^126
57+
// q < 2^239 / 2^112 = 2^128
58+
// 0 <= r < b
59+
let (q, r) = aa.div_rem(b);
60+
61+
// result = floor((aa << e) / b) = (q << e) + floor((r << e) / b)
62+
// 0 <= (r << e) / b < 2^e
63+
//
64+
// There are two cases:
65+
// 1. floor((r << e) / b) = 0
66+
// 2. 0 < floor((r << e) / b) < 2^e
67+
//
68+
// In case 1:
69+
// The result is q << e.
70+
//
71+
// In case 2:
72+
// The result is (q << e) + non-zero low e bits.
73+
// This rounds the same way as (q | 1) << e because rounding beyond
74+
// the 25 most significant bits of q depends only on whether the low-order
75+
// bits are non-zero.
76+
//
77+
// Case 1 happens when:
78+
// (r << e) / b < 1
79+
// (r << e) <= b - 1
80+
// r <= ((b - 1) >> e)
81+
let case_1_bound = if e < 128 { (b - 1) >> e } else { 0 };
82+
let q_adj = if r <= case_1_bound {
83+
// Case 1.
84+
q
85+
} else {
86+
// Case 2.
87+
q | 1
88+
};
89+
q_adj as f128 * pow2(e)
90+
}
91+
}
92+
93+
/// Returns `ceil((a << exp) / b)`.
94+
///
95+
/// Requires `2^112 <= a, b < 2^113`.
96+
fn div_ceil(exp: i32, a: u128, b: u128) -> f128 {
97+
if exp < 0 {
98+
1.0
99+
} else if exp <= 15 {
100+
// aa < (2^113 << 15) = 2^128
101+
let aa = a << exp;
102+
// q < 2^128 / 2^112 + 1 = 2^16 + 1
103+
let q = ((aa - 1) / b) as u32 + 1;
104+
// We have to use `as` because `From<u32> for f128` is not yet implemented.
105+
q as f128
106+
} else if exp <= 127 {
107+
// aa = a << exp
108+
// aa <= ((2^113 - 1) << 127) = 2^240 - 2^127
109+
let aa = U256::shl_u128(a, exp as u32);
110+
// q <= (2^240 - 2^127) / 2^112 + 1 = 2^128 - 2^15 + 1
111+
let (q, _) = (aa - U256::ONE).div_rem(b);
112+
(q + 1) as f128
113+
} else {
114+
// aa >= (2^112 << 127) = 2^239
115+
// aa <= ((2^113 - 1) << 127) = 2^240 - 2^127
116+
let aa = U256::shl_u128(a, 127);
117+
// e > 0
118+
// The result is ceil((aa << e) / b).
119+
let e = (exp - 127) as u32;
120+
121+
// aa = q * b + r
122+
// q >= 2^239 / 2^112 = 2^126
123+
// q <= (2^240 - 2^127) / 2^112 = 2^128 - 2^15
124+
// 0 <= r < b
125+
let (q, r) = aa.div_rem(b);
126+
127+
// result = ceil((aa << e) / b) = (q << e) + ceil((r << e) / b)
128+
// 0 <= (r << e) / b < 2^e
129+
//
130+
// There are three cases:
131+
// 1. ceil((r << e) / b) = 0
132+
// 2. 0 < ceil((r << e) / b) < 2^e
133+
// 3. ceil((r << e) / b) = 2^e
134+
//
135+
// In case 1:
136+
// The result is q << e.
137+
//
138+
// In case 2:
139+
// The result is (q << e) + non-zero low e bits.
140+
// This rounds the same way as (q | 1) << e because rounding beyond
141+
// the 54 most significant bits of q depends only on whether the low-order
142+
// bits are non-zero.
143+
//
144+
// In case 3:
145+
// The result is (q + 1) << e.
146+
//
147+
// Case 1 happens when r = 0.
148+
// Case 3 happens when:
149+
// (r << e) / b > (1 << e) - 1
150+
// (r << e) > (b << e) - b
151+
// ((b - r) << e) <= b - 1
152+
// b - r <= (b - 1) >> e
153+
// r >= b - ((b - 1) >> e)
154+
let case_3_bound = b - if e < 128 { (b - 1) >> e } else { 0 };
155+
let q_adj = if r == 0 {
156+
// Case 1.
157+
q
158+
} else if r < case_3_bound {
159+
// Case 2.
160+
q | 1
161+
} else {
162+
// Case 3.
163+
q + 1
164+
};
165+
q_adj as f128 * pow2(e)
166+
}
167+
}
168+
169+
/// For finite, non-zero numbers returns (sign, exponent, mantissa).
170+
///
171+
/// `x = (-1)^sign * 2^exp * mantissa`
172+
///
173+
/// `2^112 <= mantissa < 2^113`
174+
fn normal_form(x: f128) -> Option<(bool, i32, u128)> {
175+
let bits = x.to_bits();
176+
let sign = bits >> 127 != 0;
177+
let biased_exponent = (bits >> 112 & 0x7fff) as i32;
178+
let significand = bits & ((1 << 112) - 1);
179+
match biased_exponent {
180+
0 if significand == 0 => {
181+
// 0.0
182+
None
183+
}
184+
0 => {
185+
// Subnormal number: 2^(-16382-112) * significand.
186+
// We want mantissa to have exactly 15 leading zeros.
187+
let shift = significand.leading_zeros() - 15;
188+
Some((sign, -16382 - 112 - shift as i32, significand << shift))
189+
}
190+
0x7fff => {
191+
// Infinity or NaN.
192+
None
193+
}
194+
_ => {
195+
// Normal number: 2^(biased_exponent-16383-112) * (2^112 + significand)
196+
Some((sign, biased_exponent - 16383 - 112, 1 << 112 | significand))
197+
}
198+
}
199+
}
200+
201+
/// Returns `2^exp`.
202+
fn pow2(exp: u32) -> f128 {
203+
if exp <= 16383 { f128::from_bits(u128::from(exp + 16383) << 112) } else { f128::INFINITY }
204+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#![cfg(reliable_f128_math)]
2+
3+
#[test]
4+
fn test_normal_form() {
5+
use crate::f128::div_euclid::normal_form;
6+
7+
assert_eq!(normal_form(-1.5f128), Some((true, -112, 3 << 111)));
8+
assert_eq!(normal_form(f128::MIN_POSITIVE), Some((false, -16494, 1 << 112)));
9+
assert_eq!(normal_form(f128::MIN_POSITIVE / 2.0), Some((false, -16495, 1 << 112)));
10+
assert_eq!(normal_form(f128::MAX), Some((false, 16271, (1 << 113) - 1)));
11+
assert_eq!(normal_form(0.0), None);
12+
assert_eq!(normal_form(f128::INFINITY), None);
13+
assert_eq!(normal_form(f128::NAN), None);
14+
}
15+
16+
#[test]
17+
fn test_pow2() {
18+
use crate::f128::div_euclid::pow2;
19+
20+
assert_eq!(pow2(0), 1.0f128);
21+
assert_eq!(pow2(4), 16.0f128);
22+
assert_eq!(pow2(16384), f128::INFINITY);
23+
}

0 commit comments

Comments
 (0)