@@ -398,9 +398,11 @@ where
398
398
// b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
399
399
let c_hw = F :: C_HW ;
400
400
401
+ // Check that the top bit is set, i.e. value is within `[1, 2)`.
401
402
debug_assert ! (
402
403
b_uq1_hw & HalfRep :: <F >:: ONE << ( HalfRep :: <F >:: BITS - 1 ) > HalfRep :: <F >:: ZERO
403
404
) ;
405
+
404
406
// b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
405
407
// so x0 fits to UQ0.HW without wrapping.
406
408
let mut x_uq0_hw: HalfRep < F > =
@@ -435,9 +437,8 @@ where
435
437
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
436
438
// expected to be strictly positive because b_UQ1_hw has its highest bit set
437
439
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
438
- let corr_uq1_hw: HalfRep < F > = zero
439
- . wrapping_sub ( ( F :: Int :: from ( x_uq0_hw) . wrapping_mul ( F :: Int :: from ( b_uq1_hw) ) ) >> hw)
440
- . cast ( ) ;
440
+ let corr_uq1_hw: HalfRep < F > =
441
+ HalfRep :: < F > :: ZERO . wrapping_sub ( x_uq0_hw. widen_mul ( b_uq1_hw) . hi ( ) ) ;
441
442
442
443
// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
443
444
// obtaining an UQ1.(HW-1) number and proving its highest bit could be
@@ -451,8 +452,7 @@ where
451
452
// The fact corr_UQ1_hw was virtually round up (due to result of
452
453
// multiplication being **first** truncated, then negated - to improve
453
454
// error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
454
- x_uq0_hw =
455
- ( F :: Int :: from ( x_uq0_hw) . wrapping_mul ( F :: Int :: from ( corr_uq1_hw) ) >> ( hw - 1 ) ) . cast ( ) ;
455
+ x_uq0_hw = ( x_uq0_hw. widen_mul ( corr_uq1_hw) >> ( hw - 1 ) ) . cast ( ) ;
456
456
457
457
// Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
458
458
// representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
0 commit comments