|
9 | 9 | #define uint32_lo(i) ((i) & 0xffff)
|
10 | 10 | #define uint32_hi(i) ((i) >> 16)
|
11 | 11 | #define uint32_carry16 ((1) << 16)
|
| 12 | +/* Check interval |
| 13 | + * For any variable interval checking: |
| 14 | + * if (x > minx - epsilon && x < minx + epsilon) ... |
| 15 | + * it is faster to use bit operation: |
| 16 | + * if ((signed)((x - (minx - epsilon)) | ((minx + epsilon) - x)) > 0) ... |
| 17 | + */ |
| 18 | +#define CHECK_INTERVAL(x, minx, epsilon) \ |
| 19 | + ((int32_t) ((x - (minx - epsilon)) | (minx + epsilon - x)) > 0) |
12 | 20 |
|
13 | 21 | twin_fixed_t twin_fixed_sqrt(twin_fixed_t a)
|
14 | 22 | {
|
15 |
| - twin_fixed_t max = a, min = 0; |
16 |
| - |
17 |
| - while (max > min) { |
18 |
| - twin_fixed_t mid = (max + min) >> 1; |
19 |
| - if (mid >= 181 * TWIN_FIXED_ONE) { |
20 |
| - max = mid - 1; |
21 |
| - continue; |
22 |
| - } |
23 |
| - twin_fixed_t sqr = twin_fixed_mul(mid, mid); |
24 |
| - if (sqr == a) |
25 |
| - return mid; |
26 |
| - if (sqr < a) |
27 |
| - min = mid + 1; |
28 |
| - else |
29 |
| - max = mid - 1; |
| 23 | + if (a <= 0) |
| 24 | + return 0; |
| 25 | + if (CHECK_INTERVAL(a, TWIN_FIXED_ONE, (1 << (8 - 1)))) |
| 26 | + return TWIN_FIXED_ONE; |
| 27 | + |
| 28 | + /* Count leading zero */ |
| 29 | + int offset = 0; |
| 30 | + for (twin_fixed_t i = a; !(0x40000000 & i); i <<= 1) |
| 31 | + ++offset; |
| 32 | + |
| 33 | + /* Shift left 'a' to expand more digit for sqrt precision */ |
| 34 | + offset &= ~1; |
| 35 | + a <<= offset; |
| 36 | + /* Calculate the digits need to shift back */ |
| 37 | + offset >>= 1; |
| 38 | + offset -= (16 >> 1); |
| 39 | + /* Use digit-by-digit calculation to compute square root */ |
| 40 | + twin_fixed_t z = 0; |
| 41 | + for (twin_fixed_t m = 1UL << ((31 - __builtin_clz(a)) & ~1UL); m; m >>= 2) { |
| 42 | + int b = z + m; |
| 43 | + z >>= 1; |
| 44 | + if (a >= b) |
| 45 | + a -= b, z += m; |
30 | 46 | }
|
31 |
| - return (max + min) >> 1; |
| 47 | + /* Shift back the expanded digits */ |
| 48 | + return (offset >= 0) ? z >> offset : z << (-offset); |
32 | 49 | }
|
33 | 50 |
|
34 | 51 | twin_sfixed_t _twin_sfixed_sqrt(twin_sfixed_t as)
|
35 | 52 | {
|
36 |
| - twin_dfixed_t max = as, min = 0; |
37 |
| - twin_dfixed_t a = twin_sfixed_to_dfixed(as); |
38 |
| - |
39 |
| - while (max > min) { |
40 |
| - twin_dfixed_t mid = (max + min) >> 1; |
41 |
| - twin_dfixed_t sqr = mid * mid; |
42 |
| - if (sqr == a) |
43 |
| - return (twin_sfixed_t) mid; |
44 |
| - if (sqr < a) |
45 |
| - min = mid + 1; |
46 |
| - else |
47 |
| - max = mid - 1; |
| 53 | + if (as <= 0) |
| 54 | + return 0; |
| 55 | + if (CHECK_INTERVAL(as, TWIN_SFIXED_ONE, (1 << (2 - 1)))) |
| 56 | + return TWIN_SFIXED_ONE; |
| 57 | + |
| 58 | + int offset = 0; |
| 59 | + for (twin_sfixed_t i = as; !(0x4000 & i); i <<= 1) |
| 60 | + ++offset; |
| 61 | + |
| 62 | + offset &= ~1; |
| 63 | + as <<= offset; |
| 64 | + |
| 65 | + offset >>= 1; |
| 66 | + offset -= (4 >> 1); |
| 67 | + |
| 68 | + twin_sfixed_t z = 0; |
| 69 | + for (twin_sfixed_t m = 1UL << ((31 - __builtin_clz(as)) & ~1UL); m; |
| 70 | + m >>= 2) { |
| 71 | + int16_t b = z + m; |
| 72 | + z >>= 1; |
| 73 | + if (as >= b) |
| 74 | + as -= b, z += m; |
48 | 75 | }
|
49 |
| - return (twin_sfixed_t) ((max + min) >> 1); |
| 76 | + |
| 77 | + return (offset >= 0) ? z >> offset : z << (-offset); |
50 | 78 | }
|
0 commit comments