2 * QEMU float support macros
4 * Derived from SoftFloat.
7 /*============================================================================
9 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
10 Arithmetic Package, Release 2b.
12 Written by John R. Hauser. This work was made possible in part by the
13 International Computer Science Institute, located at Suite 600, 1947 Center
14 Street, Berkeley, California 94704. Funding was partially provided by the
15 National Science Foundation under grant MIP-9311980. The original version
16 of this code was written as part of a project to build a fixed-point vector
17 processor in collaboration with the University of California at Berkeley,
18 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
19 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20 arithmetic/SoftFloat.html'.
22 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
23 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28 INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
29 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
31 Derivative works are acceptable, even for commercial purposes, so long as
32 (1) the source code for the derivative work includes prominent notice that
33 the work is derivative, and (2) the source code includes prominent notice with
34 these four paragraphs for those parts of this code that are retained.
36 =============================================================================*/
38 /*----------------------------------------------------------------------------
39 | This macro tests for minimum version of the GNU C compiler.
40 *----------------------------------------------------------------------------*/
41 #if defined(__GNUC__) && defined(__GNUC_MINOR__)
42 # define SOFTFLOAT_GNUC_PREREQ(maj, min) \
43 ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
45 # define SOFTFLOAT_GNUC_PREREQ(maj, min) 0
49 /*----------------------------------------------------------------------------
50 | Shifts `a' right by the number of bits given in `count'. If any nonzero
51 | bits are shifted off, they are ``jammed'' into the least significant bit of
52 | the result by setting the least significant bit to 1. The value of `count'
53 | can be arbitrarily large; in particular, if `count' is greater than 32, the
54 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
55 | The result is stored in the location pointed to by `zPtr'.
56 *----------------------------------------------------------------------------*/
58 static inline void shift32RightJamming(uint32_t a, int_fast16_t count, uint32_t *zPtr)
65 else if ( count < 32 ) {
66 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
75 /*----------------------------------------------------------------------------
76 | Shifts `a' right by the number of bits given in `count'. If any nonzero
77 | bits are shifted off, they are ``jammed'' into the least significant bit of
78 | the result by setting the least significant bit to 1. The value of `count'
79 | can be arbitrarily large; in particular, if `count' is greater than 64, the
80 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
81 | The result is stored in the location pointed to by `zPtr'.
82 *----------------------------------------------------------------------------*/
84 static inline void shift64RightJamming(uint64_t a, int_fast16_t count, uint64_t *zPtr)
91 else if ( count < 64 ) {
92 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
101 /*----------------------------------------------------------------------------
102 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
103 | _plus_ the number of bits given in `count'. The shifted result is at most
104 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
105 | bits shifted off form a second 64-bit result as follows: The _last_ bit
106 | shifted off is the most-significant bit of the extra result, and the other
107 | 63 bits of the extra result are all zero if and only if _all_but_the_last_
108 | bits shifted off were all zero. This extra result is stored in the location
109 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
110 | (This routine makes more sense if `a0' and `a1' are considered to form
111 | a fixed-point value with binary point between `a0' and `a1'. This fixed-
112 | point value is shifted right by the number of bits given in `count', and
113 | the integer part of the result is returned at the location pointed to by
114 | `z0Ptr'. The fractional part of the result may be slightly corrupted as
115 | described above, and is returned at the location pointed to by `z1Ptr'.)
116 *----------------------------------------------------------------------------*/
119 shift64ExtraRightJamming(
120 uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
123 int8 negCount = ( - count ) & 63;
129 else if ( count < 64 ) {
130 z1 = ( a0<<negCount ) | ( a1 != 0 );
135 z1 = a0 | ( a1 != 0 );
138 z1 = ( ( a0 | a1 ) != 0 );
147 /*----------------------------------------------------------------------------
148 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
149 | number of bits given in `count'. Any bits shifted off are lost. The value
150 | of `count' can be arbitrarily large; in particular, if `count' is greater
151 | than 128, the result will be 0. The result is broken into two 64-bit pieces
152 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
153 *----------------------------------------------------------------------------*/
157 uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
160 int8 negCount = ( - count ) & 63;
166 else if ( count < 64 ) {
167 z1 = ( a0<<negCount ) | ( a1>>count );
171 z1 = (count < 128) ? (a0 >> (count & 63)) : 0;
179 /*----------------------------------------------------------------------------
180 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
181 | number of bits given in `count'. If any nonzero bits are shifted off, they
182 | are ``jammed'' into the least significant bit of the result by setting the
183 | least significant bit to 1. The value of `count' can be arbitrarily large;
184 | in particular, if `count' is greater than 128, the result will be either
185 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
186 | nonzero. The result is broken into two 64-bit pieces which are stored at
187 | the locations pointed to by `z0Ptr' and `z1Ptr'.
188 *----------------------------------------------------------------------------*/
191 shift128RightJamming(
192 uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
195 int8 negCount = ( - count ) & 63;
201 else if ( count < 64 ) {
202 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
207 z1 = a0 | ( a1 != 0 );
209 else if ( count < 128 ) {
210 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
213 z1 = ( ( a0 | a1 ) != 0 );
222 /*----------------------------------------------------------------------------
223 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
224 | by 64 _plus_ the number of bits given in `count'. The shifted result is
225 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
226 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
227 | off form a third 64-bit result as follows: The _last_ bit shifted off is
228 | the most-significant bit of the extra result, and the other 63 bits of the
229 | extra result are all zero if and only if _all_but_the_last_ bits shifted off
230 | were all zero. This extra result is stored in the location pointed to by
231 | `z2Ptr'. The value of `count' can be arbitrarily large.
232 | (This routine makes more sense if `a0', `a1', and `a2' are considered
233 | to form a fixed-point value with binary point between `a1' and `a2'. This
234 | fixed-point value is shifted right by the number of bits given in `count',
235 | and the integer part of the result is returned at the locations pointed to
236 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
237 | corrupted as described above, and is returned at the location pointed to by
239 *----------------------------------------------------------------------------*/
242 shift128ExtraRightJamming(
253 int8 negCount = ( - count ) & 63;
263 z1 = ( a0<<negCount ) | ( a1>>count );
275 z1 = a0>>( count & 63 );
278 z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
292 /*----------------------------------------------------------------------------
293 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
294 | number of bits given in `count'. Any bits shifted off are lost. The value
295 | of `count' must be less than 64. The result is broken into two 64-bit
296 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
297 *----------------------------------------------------------------------------*/
301 uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
306 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
310 /*----------------------------------------------------------------------------
311 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
312 | by the number of bits given in `count'. Any bits shifted off are lost.
313 | The value of `count' must be less than 64. The result is broken into three
314 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
315 | `z1Ptr', and `z2Ptr'.
316 *----------------------------------------------------------------------------*/
336 negCount = ( ( - count ) & 63 );
346 /*----------------------------------------------------------------------------
347 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
348 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
349 | any carry out is lost. The result is broken into two 64-bit pieces which
350 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
351 *----------------------------------------------------------------------------*/
355 uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
361 *z0Ptr = a0 + b0 + ( z1 < a1 );
365 /*----------------------------------------------------------------------------
366 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
367 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
368 | modulo 2^192, so any carry out is lost. The result is broken into three
369 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
370 | `z1Ptr', and `z2Ptr'.
371 *----------------------------------------------------------------------------*/
390 carry1 = ( z2 < a2 );
392 carry0 = ( z1 < a1 );
395 z0 += ( z1 < carry1 );
403 /*----------------------------------------------------------------------------
404 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
405 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
406 | 2^128, so any borrow out (carry out) is lost. The result is broken into two
407 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
409 *----------------------------------------------------------------------------*/
413 uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
417 *z0Ptr = a0 - b0 - ( a1 < b1 );
421 /*----------------------------------------------------------------------------
422 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
423 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
424 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
425 | result is broken into three 64-bit pieces which are stored at the locations
426 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
427 *----------------------------------------------------------------------------*/
443 int8 borrow0, borrow1;
446 borrow1 = ( a2 < b2 );
448 borrow0 = ( a1 < b1 );
450 z0 -= ( z1 < borrow1 );
459 /*----------------------------------------------------------------------------
460 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
461 | into two 64-bit pieces which are stored at the locations pointed to by
462 | `z0Ptr' and `z1Ptr'.
463 *----------------------------------------------------------------------------*/
465 static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
467 uint32_t aHigh, aLow, bHigh, bLow;
468 uint64_t z0, zMiddleA, zMiddleB, z1;
474 z1 = ( (uint64_t) aLow ) * bLow;
475 zMiddleA = ( (uint64_t) aLow ) * bHigh;
476 zMiddleB = ( (uint64_t) aHigh ) * bLow;
477 z0 = ( (uint64_t) aHigh ) * bHigh;
478 zMiddleA += zMiddleB;
479 z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
482 z0 += ( z1 < zMiddleA );
488 /*----------------------------------------------------------------------------
489 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
490 | `b' to obtain a 192-bit product. The product is broken into three 64-bit
491 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
493 *----------------------------------------------------------------------------*/
505 uint64_t z0, z1, z2, more1;
507 mul64To128( a1, b, &z1, &z2 );
508 mul64To128( a0, b, &z0, &more1 );
509 add128( z0, more1, 0, z1, &z0, &z1 );
516 /*----------------------------------------------------------------------------
517 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
518 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
519 | product. The product is broken into four 64-bit pieces which are stored at
520 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
521 *----------------------------------------------------------------------------*/
535 uint64_t z0, z1, z2, z3;
536 uint64_t more1, more2;
538 mul64To128( a1, b1, &z2, &z3 );
539 mul64To128( a1, b0, &z1, &more2 );
540 add128( z1, more2, 0, z2, &z1, &z2 );
541 mul64To128( a0, b0, &z0, &more1 );
542 add128( z0, more1, 0, z1, &z0, &z1 );
543 mul64To128( a0, b1, &more1, &more2 );
544 add128( more1, more2, 0, z2, &more1, &z2 );
545 add128( z0, z1, 0, more1, &z0, &z1 );
553 /*----------------------------------------------------------------------------
554 | Returns an approximation to the 64-bit integer quotient obtained by dividing
555 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
556 | divisor `b' must be at least 2^63. If q is the exact quotient truncated
557 | toward zero, the approximation returned lies between q and q + 2 inclusive.
558 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
559 | unsigned integer is returned.
560 *----------------------------------------------------------------------------*/
562 static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
565 uint64_t rem0, rem1, term0, term1;
568 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
570 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
571 mul64To128( b, z, &term0, &term1 );
572 sub128( a0, a1, term0, term1, &rem0, &rem1 );
573 while ( ( (int64_t) rem0 ) < 0 ) {
574 z -= LIT64( 0x100000000 );
576 add128( rem0, rem1, b0, b1, &rem0, &rem1 );
578 rem0 = ( rem0<<32 ) | ( rem1>>32 );
579 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
584 /*----------------------------------------------------------------------------
585 | Returns an approximation to the square root of the 32-bit significand given
586 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
587 | `aExp' (the least significant bit) is 1, the integer returned approximates
588 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
589 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
590 | case, the approximation returned lies strictly within +/-2 of the exact
592 *----------------------------------------------------------------------------*/
594 static uint32_t estimateSqrt32(int_fast16_t aExp, uint32_t a)
596 static const uint16_t sqrtOddAdjustments[] = {
597 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
598 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
600 static const uint16_t sqrtEvenAdjustments[] = {
601 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
602 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
607 index = ( a>>27 ) & 15;
609 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
610 z = ( ( a / z )<<14 ) + ( z<<15 );
614 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
616 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
617 if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
619 return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
623 /*----------------------------------------------------------------------------
624 | Returns the number of leading 0 bits before the most-significant 1 bit of
625 | `a'. If `a' is zero, 32 is returned.
626 *----------------------------------------------------------------------------*/
628 static int8 countLeadingZeros32( uint32_t a )
630 #if SOFTFLOAT_GNUC_PREREQ(3, 4)
632 return __builtin_clz(a);
637 static const int8 countLeadingZerosHigh[] = {
638 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
639 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
640 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
641 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
642 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
643 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
644 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
645 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
646 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
647 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
648 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
649 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
651 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
652 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
653 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
662 if ( a < 0x1000000 ) {
666 shiftCount += countLeadingZerosHigh[ a>>24 ];
671 /*----------------------------------------------------------------------------
672 | Returns the number of leading 0 bits before the most-significant 1 bit of
673 | `a'. If `a' is zero, 64 is returned.
674 *----------------------------------------------------------------------------*/
676 static int8 countLeadingZeros64( uint64_t a )
678 #if SOFTFLOAT_GNUC_PREREQ(3, 4)
680 return __builtin_clzll(a);
688 if ( a < ( (uint64_t) 1 )<<32 ) {
694 shiftCount += countLeadingZeros32( a );
699 /*----------------------------------------------------------------------------
700 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
701 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
702 | Otherwise, returns 0.
703 *----------------------------------------------------------------------------*/
705 static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
708 return ( a0 == b0 ) && ( a1 == b1 );
712 /*----------------------------------------------------------------------------
713 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
714 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
715 | Otherwise, returns 0.
716 *----------------------------------------------------------------------------*/
718 static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
721 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
725 /*----------------------------------------------------------------------------
726 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
727 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
729 *----------------------------------------------------------------------------*/
731 static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
734 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
738 /*----------------------------------------------------------------------------
739 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
740 | not equal to the 128-bit value formed by concatenating `b0' and `b1'.
741 | Otherwise, returns 0.
742 *----------------------------------------------------------------------------*/
744 static inline flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
747 return ( a0 != b0 ) || ( a1 != b1 );