gcc/longlong.h

   1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
   2    Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   3    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   4    Free Software Foundation, Inc.
   5
   6    This file is part of the GNU C Library.
   7
   8    The GNU C Library is free software; you can redistribute it and/or
   9    modify it under the terms of the GNU Lesser General Public
  10    License as published by the Free Software Foundation; either
  11    version 2.1 of the License, or (at your option) any later version.
  12
  13    In addition to the permissions in the GNU Lesser General Public
  14    License, the Free Software Foundation gives you unlimited
  15    permission to link the compiled version of this file into
  16    combinations with other programs, and to distribute those
  17    combinations without any restriction coming from the use of this
  18    file.  (The Lesser General Public License restrictions do apply in
  19    other respects; for example, they cover modification of the file,
  20    and distribution when not linked into a combine executable.)
  21
  22    The GNU C Library is distributed in the hope that it will be useful,
  23    but WITHOUT ANY WARRANTY; without even the implied warranty of
  24    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  25    Lesser General Public License for more details.
  26
  27    You should have received a copy of the GNU Lesser General Public
  28    License along with the GNU C Library; if not, write to the Free
  29    Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
  30    MA 02110-1301, USA.  */
  31
  32 /* You have to define the following before including this file:
  33
  34    UWtype -- An unsigned type, default type for operations (typically a "word")
  35    UHWtype -- An unsigned type, at least half the size of UWtype.
  36    UDWtype -- An unsigned type, at least twice as large a UWtype
  37    W_TYPE_SIZE -- size in bits of UWtype
  38
  39    UQItype -- Unsigned 8 bit type.
  40    SItype, USItype -- Signed and unsigned 32 bit types.
  41    DItype, UDItype -- Signed and unsigned 64 bit types.
  42
  43    On a 32 bit machine UWtype should typically be USItype;
  44    on a 64 bit machine, UWtype should typically be UDItype.  */
  45
  46 #define __BITS4 (W_TYPE_SIZE / 4)
  47 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
  48 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
  49 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
  50
  51 #ifndef W_TYPE_SIZE
  52 #define W_TYPE_SIZE     32
  53 #define UWtype          USItype
  54 #define UHWtype         USItype
  55 #define UDWtype         UDItype
  56 #endif
  57
  58 /* Used in glibc only.  */
  59 #ifndef attribute_hidden
  60 #define attribute_hidden
  61 #endif
  62
  63 extern const UQItype __clz_tab[256] attribute_hidden;
  64
  65 /* Define auxiliary asm macros.
  66
  67    1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
  68    UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
  69    word product in HIGH_PROD and LOW_PROD.
  70
  71    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
  72    UDWtype product.  This is just a variant of umul_ppmm.
  73
  74    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  75    denominator) divides a UDWtype, composed by the UWtype integers
  76    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
  77    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
  78    than DENOMINATOR for correct operation.  If, in addition, the most
  79    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
  80    UDIV_NEEDS_NORMALIZATION is defined to 1.
  81
  82    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  83    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
  84    is rounded towards 0.
  85
  86    5) count_leading_zeros(count, x) counts the number of zero-bits from the
  87    msb to the first nonzero bit in the UWtype X.  This is the number of
  88    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
  89    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
  90
  91    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
  92    from the least significant end.
  93
  94    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
  95    high_addend_2, low_addend_2) adds two UWtype integers, composed by
  96    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
  97    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
  98    (i.e. carry out) is not stored anywhere, and is lost.
  99
 100    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
 101    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
 102    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
 103    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
 104    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
 105    and is lost.
 106
 107    If any of these macros are left undefined for a particular CPU,
 108    C macros are used.  */
 109
 110 /* The CPUs come in alphabetical order below.
 111
 112    Please add support for more CPUs here, or improve the current support
 113    for the CPUs below!
 114    (E.g. WE32100, IBM360.)  */
 115
 116 #if defined (__GNUC__) && !defined (NO_ASM)
 117
 118 /* We sometimes need to clobber "cc" with gcc2, but that would not be
 119    understood by gcc1.  Use cpp to avoid major code duplication.  */
 120 #if __GNUC__ < 2
 121 #define __CLOBBER_CC
 122 #define __AND_CLOBBER_CC
 123 #else /* __GNUC__ >= 2 */
 124 #define __CLOBBER_CC : "cc"
 125 #define __AND_CLOBBER_CC , "cc"
 126 #endif /* __GNUC__ < 2 */
 127
 128 #if defined (__alpha) && W_TYPE_SIZE == 64
 129 #define umul_ppmm(ph, pl, m0, m1) \
 130   do {                                                                  \
 131     UDItype __m0 = (m0), __m1 = (m1);                                   \
 132     (ph) = __builtin_alpha_umulh (__m0, __m1);                          \
 133     (pl) = __m0 * __m1;                                                 \
 134   } while (0)
 135 #define UMUL_TIME 46
 136 #ifndef LONGLONG_STANDALONE
 137 #define udiv_qrnnd(q, r, n1, n0, d) \
 138   do { UDItype __r;                                                     \
 139     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
 140     (r) = __r;                                                          \
 141   } while (0)
 142 extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
 143 #define UDIV_TIME 220
 144 #endif /* LONGLONG_STANDALONE */
 145 #ifdef __alpha_cix__
 146 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
 147 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
 148 #define COUNT_LEADING_ZEROS_0 64
 149 #else
 150 #define count_leading_zeros(COUNT,X) \
 151   do {                                                                  \
 152     UDItype __xr = (X), __t, __a;                                       \
 153     __t = __builtin_alpha_cmpbge (0, __xr);                             \
 154     __a = __clz_tab[__t ^ 0xff] - 1;                                    \
 155     __t = __builtin_alpha_extbl (__xr, __a);                            \
 156     (COUNT) = 64 - (__clz_tab[__t] + __a*8);                            \
 157   } while (0)
 158 #define count_trailing_zeros(COUNT,X) \
 159   do {                                                                  \
 160     UDItype __xr = (X), __t, __a;                                       \
 161     __t = __builtin_alpha_cmpbge (0, __xr);                             \
 162     __t = ~__t & -~__t;                                                 \
 163     __a = ((__t & 0xCC) != 0) * 2;                                      \
 164     __a += ((__t & 0xF0) != 0) * 4;                                     \
 165     __a += ((__t & 0xAA) != 0);                                         \
 166     __t = __builtin_alpha_extbl (__xr, __a);                            \
 167     __a <<= 3;                                                          \
 168     __t &= -__t;                                                        \
 169     __a += ((__t & 0xCC) != 0) * 2;                                     \
 170     __a += ((__t & 0xF0) != 0) * 4;                                     \
 171     __a += ((__t & 0xAA) != 0);                                         \
 172     (COUNT) = __a;                                                      \
 173   } while (0)
 174 #endif /* __alpha_cix__ */
 175 #endif /* __alpha */
 176
 177 #if defined (__arc__) && W_TYPE_SIZE == 32
 178 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 179   __asm__ ("add.f       %1, %4, %5\n\tadc       %0, %2, %3"             \
 180            : "=r" ((USItype) (sh)),                                     \
 181              "=&r" ((USItype) (sl))                                     \
 182            : "%r" ((USItype) (ah)),                                     \
 183              "rIJ" ((USItype) (bh)),                                    \
 184              "%r" ((USItype) (al)),                                     \
 185              "rIJ" ((USItype) (bl)))
 186 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 187   __asm__ ("sub.f       %1, %4, %5\n\tsbc       %0, %2, %3"             \
 188            : "=r" ((USItype) (sh)),                                     \
 189              "=&r" ((USItype) (sl))                                     \
 190            : "r" ((USItype) (ah)),                                      \
 191              "rIJ" ((USItype) (bh)),                                    \
 192              "r" ((USItype) (al)),                                      \
 193              "rIJ" ((USItype) (bl)))
 194 /* Call libgcc routine.  */
 195 #define umul_ppmm(w1, w0, u, v) \
 196 do {                                                                    \
 197   DWunion __w;                                                          \
 198   __w.ll = __umulsidi3 (u, v);                                          \
 199   w1 = __w.s.high;                                                      \
 200   w0 = __w.s.low;                                                       \
 201 } while (0)
 202 #define __umulsidi3 __umulsidi3
 203 UDItype __umulsidi3 (USItype, USItype);
 204 #endif
 205
 206 #if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
 207 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 208   __asm__ ("adds        %1, %4, %5\n\tadc       %0, %2, %3"             \
 209            : "=r" ((USItype) (sh)),                                     \
 210              "=&r" ((USItype) (sl))                                     \
 211            : "%r" ((USItype) (ah)),                                     \
 212              "rI" ((USItype) (bh)),                                     \
 213              "%r" ((USItype) (al)),                                     \
 214              "rI" ((USItype) (bl)) __CLOBBER_CC)
 215 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 216   __asm__ ("subs        %1, %4, %5\n\tsbc       %0, %2, %3"             \
 217            : "=r" ((USItype) (sh)),                                     \
 218              "=&r" ((USItype) (sl))                                     \
 219            : "r" ((USItype) (ah)),                                      \
 220              "rI" ((USItype) (bh)),                                     \
 221              "r" ((USItype) (al)),                                      \
 222              "rI" ((USItype) (bl)) __CLOBBER_CC)
 223 #define umul_ppmm(xh, xl, a, b) \
 224 {register USItype __t0, __t1, __t2;                                     \
 225   __asm__ ("%@ Inlined umul_ppmm\n"                                     \
 226            "    mov     %2, %5, lsr #16\n"                              \
 227            "    mov     %0, %6, lsr #16\n"                              \
 228            "    bic     %3, %5, %2, lsl #16\n"                          \
 229            "    bic     %4, %6, %0, lsl #16\n"                          \
 230            "    mul     %1, %3, %4\n"                                   \
 231            "    mul     %4, %2, %4\n"                                   \
 232            "    mul     %3, %0, %3\n"                                   \
 233            "    mul     %0, %2, %0\n"                                   \
 234            "    adds    %3, %4, %3\n"                                   \
 235            "    addcs   %0, %0, #65536\n"                               \
 236            "    adds    %1, %1, %3, lsl #16\n"                          \
 237            "    adc     %0, %0, %3, lsr #16"                            \
 238            : "=&r" ((USItype) (xh)),                                    \
 239              "=r" ((USItype) (xl)),                                     \
 240              "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
 241            : "r" ((USItype) (a)),                                       \
 242              "r" ((USItype) (b)) __CLOBBER_CC );}
 243 #define UMUL_TIME 20
 244 #define UDIV_TIME 100
 245 #endif /* __arm__ */
 246
 247 #if defined(__arm__)
 248 /* Let gcc decide how best to implement count_leading_zeros.  */
 249 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
 250 #define COUNT_LEADING_ZEROS_0 32
 251 #endif
 252
 253 #if defined (__AVR__) && W_TYPE_SIZE == 32
 254 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzl (X))
 255 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
 256 #define COUNT_LEADING_ZEROS_0 32
 257 #endif /* defined (__AVR__) && W_TYPE_SIZE == 32 */
 258
 259 #if defined (__CRIS__) && __CRIS_arch_version >= 3
 260 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
 261 #if __CRIS_arch_version >= 8
 262 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
 263 #endif
 264 #endif /* __CRIS__ */
 265
 266 #if defined (__hppa) && W_TYPE_SIZE == 32
 267 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 268   __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"                              \
 269            : "=r" ((USItype) (sh)),                                     \
 270              "=&r" ((USItype) (sl))                                     \
 271            : "%rM" ((USItype) (ah)),                                    \
 272              "rM" ((USItype) (bh)),                                     \
 273              "%rM" ((USItype) (al)),                                    \
 274              "rM" ((USItype) (bl)))
 275 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 276   __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"                              \
 277            : "=r" ((USItype) (sh)),                                     \
 278              "=&r" ((USItype) (sl))                                     \
 279            : "rM" ((USItype) (ah)),                                     \
 280              "rM" ((USItype) (bh)),                                     \
 281              "rM" ((USItype) (al)),                                     \
 282              "rM" ((USItype) (bl)))
 283 #if defined (_PA_RISC1_1)
 284 #define umul_ppmm(w1, w0, u, v) \
 285   do {                                                                  \
 286     union                                                               \
 287       {                                                                 \
 288         UDItype __f;                                                    \
 289         struct {USItype __w1, __w0;} __w1w0;                            \
 290       } __t;                                                            \
 291     __asm__ ("xmpyu %1,%2,%0"                                           \
 292              : "=x" (__t.__f)                                           \
 293              : "x" ((USItype) (u)),                                     \
 294                "x" ((USItype) (v)));                                    \
 295     (w1) = __t.__w1w0.__w1;                                             \
 296     (w0) = __t.__w1w0.__w0;                                             \
 297      } while (0)
 298 #define UMUL_TIME 8
 299 #else
 300 #define UMUL_TIME 30
 301 #endif
 302 #define UDIV_TIME 40
 303 #define count_leading_zeros(count, x) \
 304   do {                                                                  \
 305     USItype __tmp;                                                      \
 306     __asm__ (                                                           \
 307        "ldi             1,%0\n"                                         \
 308 "       extru,=         %1,15,16,%%r0           ; Bits 31..16 zero?\n"  \
 309 "       extru,tr        %1,15,16,%1             ; No.  Shift down, skip add.\n"\
 310 "       ldo             16(%0),%0               ; Yes.  Perform add.\n" \
 311 "       extru,=         %1,23,8,%%r0            ; Bits 15..8 zero?\n"   \
 312 "       extru,tr        %1,23,8,%1              ; No.  Shift down, skip add.\n"\
 313 "       ldo             8(%0),%0                ; Yes.  Perform add.\n" \
 314 "       extru,=         %1,27,4,%%r0            ; Bits 7..4 zero?\n"    \
 315 "       extru,tr        %1,27,4,%1              ; No.  Shift down, skip add.\n"\
 316 "       ldo             4(%0),%0                ; Yes.  Perform add.\n" \
 317 "       extru,=         %1,29,2,%%r0            ; Bits 3..2 zero?\n"    \
 318 "       extru,tr        %1,29,2,%1              ; No.  Shift down, skip add.\n"\
 319 "       ldo             2(%0),%0                ; Yes.  Perform add.\n" \
 320 "       extru           %1,30,1,%1              ; Extract bit 1.\n"     \
 321 "       sub             %0,%1,%0                ; Subtract it.\n"       \
 322         : "=r" (count), "=r" (__tmp) : "1" (x));                        \
 323   } while (0)
 324 #endif
 325
 326 #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
 327 #if !defined (__zarch__)
 328 #define smul_ppmm(xh, xl, m0, m1) \
 329   do {                                                                  \
 330     union {DItype __ll;                                                 \
 331            struct {USItype __h, __l;} __i;                              \
 332           } __x;                                                        \
 333     __asm__ ("lr %N0,%1\n\tmr %0,%2"                                    \
 334              : "=&r" (__x.__ll)                                         \
 335              : "r" (m0), "r" (m1));                                     \
 336     (xh) = __x.__i.__h; (xl) = __x.__i.__l;                             \
 337   } while (0)
 338 #define sdiv_qrnnd(q, r, n1, n0, d) \
 339   do {                                                                  \
 340     union {DItype __ll;                                                 \
 341            struct {USItype __h, __l;} __i;                              \
 342           } __x;                                                        \
 343     __x.__i.__h = n1; __x.__i.__l = n0;                                 \
 344     __asm__ ("dr %0,%2"                                                 \
 345              : "=r" (__x.__ll)                                          \
 346              : "0" (__x.__ll), "r" (d));                                \
 347     (q) = __x.__i.__l; (r) = __x.__i.__h;                               \
 348   } while (0)
 349 #else
 350 #define smul_ppmm(xh, xl, m0, m1) \
 351   do {                                                                  \
 352     register SItype r0 __asm__ ("0");                                   \
 353     register SItype r1 __asm__ ("1") = m0;                              \
 354                                                                         \
 355     __asm__ ("mr\t%%r0,%3"                                              \
 356              : "=r" (r0), "=r" (r1)                                     \
 357              : "r"  (r1),  "r" (m1));                                   \
 358     (xh) = r0; (xl) = r1;                                               \
 359   } while (0)
 360 #define sdiv_qrnnd(q, r, n1, n0, d) \
 361   do {                                                                  \
 362     register SItype r0 __asm__ ("0") = n0;                              \
 363     register SItype r1 __asm__ ("1") = n1;                              \
 364                                                                         \
 365     __asm__ ("dr\t%%r0,%3"                                              \
 366              : "=r" (r0), "=r" (r1)                                     \
 367              : "r" (r0), "r" (r1), "r" (d));                            \
 368     (q) = r0; (r) = r1;                                                 \
 369   } while (0)
 370 #endif /* __zarch__ */
 371 #endif
 372
 373 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
 374 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 375   __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"               \
 376            : "=r" ((USItype) (sh)),                                     \
 377              "=&r" ((USItype) (sl))                                     \
 378            : "%0" ((USItype) (ah)),                                     \
 379              "g" ((USItype) (bh)),                                      \
 380              "%1" ((USItype) (al)),                                     \
 381              "g" ((USItype) (bl)))
 382 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 383   __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"               \
 384            : "=r" ((USItype) (sh)),                                     \
 385              "=&r" ((USItype) (sl))                                     \
 386            : "0" ((USItype) (ah)),                                      \
 387              "g" ((USItype) (bh)),                                      \
 388              "1" ((USItype) (al)),                                      \
 389              "g" ((USItype) (bl)))
 390 #define umul_ppmm(w1, w0, u, v) \
 391   __asm__ ("mul{l} %3"                                                  \
 392            : "=a" ((USItype) (w0)),                                     \
 393              "=d" ((USItype) (w1))                                      \
 394            : "%0" ((USItype) (u)),                                      \
 395              "rm" ((USItype) (v)))
 396 #define udiv_qrnnd(q, r, n1, n0, dv) \
 397   __asm__ ("div{l} %4"                                                  \
 398            : "=a" ((USItype) (q)),                                      \
 399              "=d" ((USItype) (r))                                       \
 400            : "0" ((USItype) (n0)),                                      \
 401              "1" ((USItype) (n1)),                                      \
 402              "rm" ((USItype) (dv)))
 403 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
 404 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
 405 #define UMUL_TIME 40
 406 #define UDIV_TIME 40
 407 #endif /* 80x86 */
 408
 409 #if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64
 410 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 411   __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"               \
 412            : "=r" ((UDItype) (sh)),                                     \
 413              "=&r" ((UDItype) (sl))                                     \
 414            : "%0" ((UDItype) (ah)),                                     \
 415              "rme" ((UDItype) (bh)),                                    \
 416              "%1" ((UDItype) (al)),                                     \
 417              "rme" ((UDItype) (bl)))
 418 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 419   __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"               \
 420            : "=r" ((UDItype) (sh)),                                     \
 421              "=&r" ((UDItype) (sl))                                     \
 422            : "0" ((UDItype) (ah)),                                      \
 423              "rme" ((UDItype) (bh)),                                    \
 424              "1" ((UDItype) (al)),                                      \
 425              "rme" ((UDItype) (bl)))
 426 #define umul_ppmm(w1, w0, u, v) \
 427   __asm__ ("mul{q} %3"                                                  \
 428            : "=a" ((UDItype) (w0)),                                     \
 429              "=d" ((UDItype) (w1))                                      \
 430            : "%0" ((UDItype) (u)),                                      \
 431              "rm" ((UDItype) (v)))
 432 #define udiv_qrnnd(q, r, n1, n0, dv) \
 433   __asm__ ("div{q} %4"                                                  \
 434            : "=a" ((UDItype) (q)),                                      \
 435              "=d" ((UDItype) (r))                                       \
 436            : "0" ((UDItype) (n0)),                                      \
 437              "1" ((UDItype) (n1)),                                      \
 438              "rm" ((UDItype) (dv)))
 439 #define count_leading_zeros(count, x)   ((count) = __builtin_clzll (x))
 440 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctzll (x))
 441 #define UMUL_TIME 40
 442 #define UDIV_TIME 40
 443 #endif /* x86_64 */
 444
 445 #if defined (__i960__) && W_TYPE_SIZE == 32
 446 #define umul_ppmm(w1, w0, u, v) \
 447   ({union {UDItype __ll;                                                \
 448            struct {USItype __l, __h;} __i;                              \
 449           } __xx;                                                       \
 450   __asm__ ("emul        %2,%1,%0"                                       \
 451            : "=d" (__xx.__ll)                                           \
 452            : "%dI" ((USItype) (u)),                                     \
 453              "dI" ((USItype) (v)));                                     \
 454   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 455 #define __umulsidi3(u, v) \
 456   ({UDItype __w;                                                        \
 457     __asm__ ("emul      %2,%1,%0"                                       \
 458              : "=d" (__w)                                               \
 459              : "%dI" ((USItype) (u)),                                   \
 460                "dI" ((USItype) (v)));                                   \
 461     __w; })
 462 #endif /* __i960__ */
 463
 464 #if defined (__ia64) && W_TYPE_SIZE == 64
 465 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
 466    "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
 467    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
 468    register, which takes an extra cycle.  */
 469 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
 470   do {                                                                  \
 471     UWtype __x;                                                         \
 472     __x = (al) - (bl);                                                  \
 473     if ((al) < (bl))                                                    \
 474       (sh) = (ah) - (bh) - 1;                                           \
 475     else                                                                \
 476       (sh) = (ah) - (bh);                                               \
 477     (sl) = __x;                                                         \
 478   } while (0)
 479
 480 /* Do both product parts in assembly, since that gives better code with
 481    all gcc versions.  Some callers will just use the upper part, and in
 482    that situation we waste an instruction, but not any cycles.  */
 483 #define umul_ppmm(ph, pl, m0, m1)                                       \
 484   __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"            \
 485            : "=&f" (ph), "=f" (pl)                                      \
 486            : "f" (m0), "f" (m1))
 487 #define count_leading_zeros(count, x)                                   \
 488   do {                                                                  \
 489     UWtype _x = (x), _y, _a, _c;                                        \
 490     __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));              \
 491     __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));            \
 492     _c = (_a - 1) << 3;                                                 \
 493     _x >>= _c;                                                          \
 494     if (_x >= 1 << 4)                                                   \
 495       _x >>= 4, _c += 4;                                                \
 496     if (_x >= 1 << 2)                                                   \
 497       _x >>= 2, _c += 2;                                                \
 498     _c += _x >> 1;                                                      \
 499     (count) =  W_TYPE_SIZE - 1 - _c;                                    \
 500   } while (0)
 501 /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
 502    based, and we don't need a special case for x==0 here */
 503 #define count_trailing_zeros(count, x)                                  \
 504   do {                                                                  \
 505     UWtype __ctz_x = (x);                                               \
 506     __asm__ ("popcnt %0 = %1"                                           \
 507              : "=r" (count)                                             \
 508              : "r" ((__ctz_x-1) & ~__ctz_x));                           \
 509   } while (0)
 510 #define UMUL_TIME 14
 511 #endif
 512
 513 #if defined (__M32R__) && W_TYPE_SIZE == 32
 514 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 515   /* The cmp clears the condition bit.  */ \
 516   __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"                      \
 517            : "=r" ((USItype) (sh)),                                     \
 518              "=&r" ((USItype) (sl))                                     \
 519            : "0" ((USItype) (ah)),                                      \
 520              "r" ((USItype) (bh)),                                      \
 521              "1" ((USItype) (al)),                                      \
 522              "r" ((USItype) (bl))                                       \
 523            : "cbit")
 524 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 525   /* The cmp clears the condition bit.  */ \
 526   __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"                      \
 527            : "=r" ((USItype) (sh)),                                     \
 528              "=&r" ((USItype) (sl))                                     \
 529            : "0" ((USItype) (ah)),                                      \
 530              "r" ((USItype) (bh)),                                      \
 531              "1" ((USItype) (al)),                                      \
 532              "r" ((USItype) (bl))                                       \
 533            : "cbit")
 534 #endif /* __M32R__ */
 535
 536 #if defined (__mc68000__) && W_TYPE_SIZE == 32
 537 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 538   __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"                              \
 539            : "=d" ((USItype) (sh)),                                     \
 540              "=&d" ((USItype) (sl))                                     \
 541            : "%0" ((USItype) (ah)),                                     \
 542              "d" ((USItype) (bh)),                                      \
 543              "%1" ((USItype) (al)),                                     \
 544              "g" ((USItype) (bl)))
 545 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 546   __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"                              \
 547            : "=d" ((USItype) (sh)),                                     \
 548              "=&d" ((USItype) (sl))                                     \
 549            : "0" ((USItype) (ah)),                                      \
 550              "d" ((USItype) (bh)),                                      \
 551              "1" ((USItype) (al)),                                      \
 552              "g" ((USItype) (bl)))
 553
 554 /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
 555 #if (defined (__mc68020__) && !defined (__mc68060__))
 556 #define umul_ppmm(w1, w0, u, v) \
 557   __asm__ ("mulu%.l %3,%1:%0"                                           \
 558            : "=d" ((USItype) (w0)),                                     \
 559              "=d" ((USItype) (w1))                                      \
 560            : "%0" ((USItype) (u)),                                      \
 561              "dmi" ((USItype) (v)))
 562 #define UMUL_TIME 45
 563 #define udiv_qrnnd(q, r, n1, n0, d) \
 564   __asm__ ("divu%.l %4,%1:%0"                                           \
 565            : "=d" ((USItype) (q)),                                      \
 566              "=d" ((USItype) (r))                                       \
 567            : "0" ((USItype) (n0)),                                      \
 568              "1" ((USItype) (n1)),                                      \
 569              "dmi" ((USItype) (d)))
 570 #define UDIV_TIME 90
 571 #define sdiv_qrnnd(q, r, n1, n0, d) \
 572   __asm__ ("divs%.l %4,%1:%0"                                           \
 573            : "=d" ((USItype) (q)),                                      \
 574              "=d" ((USItype) (r))                                       \
 575            : "0" ((USItype) (n0)),                                      \
 576              "1" ((USItype) (n1)),                                      \
 577              "dmi" ((USItype) (d)))
 578
 579 #elif defined (__mcoldfire__) /* not mc68020 */
 580
 581 #define umul_ppmm(xh, xl, a, b) \
 582   __asm__ ("| Inlined umul_ppmm\n"                                      \
 583            "    move%.l %2,%/d0\n"                                      \
 584            "    move%.l %3,%/d1\n"                                      \
 585            "    move%.l %/d0,%/d2\n"                                    \
 586            "    swap    %/d0\n"                                         \
 587            "    move%.l %/d1,%/d3\n"                                    \
 588            "    swap    %/d1\n"                                         \
 589            "    move%.w %/d2,%/d4\n"                                    \
 590            "    mulu    %/d3,%/d4\n"                                    \
 591            "    mulu    %/d1,%/d2\n"                                    \
 592            "    mulu    %/d0,%/d3\n"                                    \
 593            "    mulu    %/d0,%/d1\n"                                    \
 594            "    move%.l %/d4,%/d0\n"                                    \
 595            "    clr%.w  %/d0\n"                                         \
 596            "    swap    %/d0\n"                                         \
 597            "    add%.l  %/d0,%/d2\n"                                    \
 598            "    add%.l  %/d3,%/d2\n"                                    \
 599            "    jcc     1f\n"                                           \
 600            "    add%.l  %#65536,%/d1\n"                                 \
 601            "1:  swap    %/d2\n"                                         \
 602            "    moveq   %#0,%/d0\n"                                     \
 603            "    move%.w %/d2,%/d0\n"                                    \
 604            "    move%.w %/d4,%/d2\n"                                    \
 605            "    move%.l %/d2,%1\n"                                      \
 606            "    add%.l  %/d1,%/d0\n"                                    \
 607            "    move%.l %/d0,%0"                                        \
 608            : "=g" ((USItype) (xh)),                                     \
 609              "=g" ((USItype) (xl))                                      \
 610            : "g" ((USItype) (a)),                                       \
 611              "g" ((USItype) (b))                                        \
 612            : "d0", "d1", "d2", "d3", "d4")
 613 #define UMUL_TIME 100
 614 #define UDIV_TIME 400
 615 #else /* not ColdFire */
 616 /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
 617 #define umul_ppmm(xh, xl, a, b) \
 618   __asm__ ("| Inlined umul_ppmm\n"                                      \
 619            "    move%.l %2,%/d0\n"                                      \
 620            "    move%.l %3,%/d1\n"                                      \
 621            "    move%.l %/d0,%/d2\n"                                    \
 622            "    swap    %/d0\n"                                         \
 623            "    move%.l %/d1,%/d3\n"                                    \
 624            "    swap    %/d1\n"                                         \
 625            "    move%.w %/d2,%/d4\n"                                    \
 626            "    mulu    %/d3,%/d4\n"                                    \
 627            "    mulu    %/d1,%/d2\n"                                    \
 628            "    mulu    %/d0,%/d3\n"                                    \
 629            "    mulu    %/d0,%/d1\n"                                    \
 630            "    move%.l %/d4,%/d0\n"                                    \
 631            "    eor%.w  %/d0,%/d0\n"                                    \
 632            "    swap    %/d0\n"                                         \
 633            "    add%.l  %/d0,%/d2\n"                                    \
 634            "    add%.l  %/d3,%/d2\n"                                    \
 635            "    jcc     1f\n"                                           \
 636            "    add%.l  %#65536,%/d1\n"                                 \
 637            "1:  swap    %/d2\n"                                         \
 638            "    moveq   %#0,%/d0\n"                                     \
 639            "    move%.w %/d2,%/d0\n"                                    \
 640            "    move%.w %/d4,%/d2\n"                                    \
 641            "    move%.l %/d2,%1\n"                                      \
 642            "    add%.l  %/d1,%/d0\n"                                    \
 643            "    move%.l %/d0,%0"                                        \
 644            : "=g" ((USItype) (xh)),                                     \
 645              "=g" ((USItype) (xl))                                      \
 646            : "g" ((USItype) (a)),                                       \
 647              "g" ((USItype) (b))                                        \
 648            : "d0", "d1", "d2", "d3", "d4")
 649 #define UMUL_TIME 100
 650 #define UDIV_TIME 400
 651
 652 #endif /* not mc68020 */
 653
 654 /* The '020, '030, '040 and '060 have bitfield insns.
 655    cpu32 disguises as a 68020, but lacks them.  */
 656 #if defined (__mc68020__) && !defined (__mcpu32__)
 657 #define count_leading_zeros(count, x) \
 658   __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
 659            : "=d" ((USItype) (count))                                   \
 660            : "od" ((USItype) (x)), "n" (0))
 661 /* Some ColdFire architectures have a ff1 instruction supported via
 662    __builtin_clz. */
 663 #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
 664 #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
 665 #define COUNT_LEADING_ZEROS_0 32
 666 #endif
 667 #endif /* mc68000 */
 668
 669 #if defined (__m88000__) && W_TYPE_SIZE == 32
 670 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 671   __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"                   \
 672            : "=r" ((USItype) (sh)),                                     \
 673              "=&r" ((USItype) (sl))                                     \
 674            : "%rJ" ((USItype) (ah)),                                    \
 675              "rJ" ((USItype) (bh)),                                     \
 676              "%rJ" ((USItype) (al)),                                    \
 677              "rJ" ((USItype) (bl)))
 678 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 679   __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"                   \
 680            : "=r" ((USItype) (sh)),                                     \
 681              "=&r" ((USItype) (sl))                                     \
 682            : "rJ" ((USItype) (ah)),                                     \
 683              "rJ" ((USItype) (bh)),                                     \
 684              "rJ" ((USItype) (al)),                                     \
 685              "rJ" ((USItype) (bl)))
 686 #define count_leading_zeros(count, x) \
 687   do {                                                                  \
 688     USItype __cbtmp;                                                    \
 689     __asm__ ("ff1 %0,%1"                                                \
 690              : "=r" (__cbtmp)                                           \
 691              : "r" ((USItype) (x)));                                    \
 692     (count) = __cbtmp ^ 31;                                             \
 693   } while (0)
 694 #define COUNT_LEADING_ZEROS_0 63 /* sic */
 695 #if defined (__mc88110__)
 696 #define umul_ppmm(wh, wl, u, v) \
 697   do {                                                                  \
 698     union {UDItype __ll;                                                \
 699            struct {USItype __h, __l;} __i;                              \
 700           } __xx;                                                       \
 701     __asm__ ("mulu.d    %0,%1,%2"                                       \
 702              : "=r" (__xx.__ll)                                         \
 703              : "r" ((USItype) (u)),                                     \
 704                "r" ((USItype) (v)));                                    \
 705     (wh) = __xx.__i.__h;                                                \
 706     (wl) = __xx.__i.__l;                                                \
 707   } while (0)
 708 #define udiv_qrnnd(q, r, n1, n0, d) \
 709   ({union {UDItype __ll;                                                \
 710            struct {USItype __h, __l;} __i;                              \
 711           } __xx;                                                       \
 712   USItype __q;                                                          \
 713   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
 714   __asm__ ("divu.d %0,%1,%2"                                            \
 715            : "=r" (__q)                                                 \
 716            : "r" (__xx.__ll),                                           \
 717              "r" ((USItype) (d)));                                      \
 718   (r) = (n0) - __q * (d); (q) = __q; })
 719 #define UMUL_TIME 5
 720 #define UDIV_TIME 25
 721 #else
 722 #define UMUL_TIME 17
 723 #define UDIV_TIME 150
 724 #endif /* __mc88110__ */
 725 #endif /* __m88000__ */
 726
 727 #if defined (__mn10300__)
 728 # if defined (__AM33__)
 729 #  define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
 730 #  define umul_ppmm(w1, w0, u, v)               \
 731     asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
 732 #  define smul_ppmm(w1, w0, u, v)               \
 733     asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
 734 # else
 735 #  define umul_ppmm(w1, w0, u, v)               \
 736     asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
 737 #  define smul_ppmm(w1, w0, u, v)               \
 738     asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
 739 # endif
 740 # define add_ssaaaa(sh, sl, ah, al, bh, bl)     \
 741   do {                                          \
 742     DWunion __s, __a, __b;                      \
 743     __a.s.low = (al); __a.s.high = (ah);        \
 744     __b.s.low = (bl); __b.s.high = (bh);        \
 745     __s.ll = __a.ll + __b.ll;                   \
 746     (sl) = __s.s.low; (sh) = __s.s.high;        \
 747   } while (0)
 748 # define sub_ddmmss(sh, sl, ah, al, bh, bl)     \
 749   do {                                          \
 750     DWunion __s, __a, __b;                      \
 751     __a.s.low = (al); __a.s.high = (ah);        \
 752     __b.s.low = (bl); __b.s.high = (bh);        \
 753     __s.ll = __a.ll - __b.ll;                   \
 754     (sl) = __s.s.low; (sh) = __s.s.high;        \
 755   } while (0)
 756 # define udiv_qrnnd(q, r, nh, nl, d)            \
 757   asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
 758 # define sdiv_qrnnd(q, r, nh, nl, d)            \
 759   asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
 760 # define UMUL_TIME 3
 761 # define UDIV_TIME 38
 762 #endif
 763
 764 #if defined (__mips__) && W_TYPE_SIZE == 32
 765 #define umul_ppmm(w1, w0, u, v)                                         \
 766   do {                                                                  \
 767     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
 768     (w1) = (USItype) (__x >> 32);                                       \
 769     (w0) = (USItype) (__x);                                             \
 770   } while (0)
 771 #define UMUL_TIME 10
 772 #define UDIV_TIME 100
 773
 774 #if (__mips == 32 || __mips == 64) && ! __mips16
 775 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
 776 #define COUNT_LEADING_ZEROS_0 32
 777 #endif
 778 #endif /* __mips__ */
 779
 780 #if defined (__ns32000__) && W_TYPE_SIZE == 32
 781 #define umul_ppmm(w1, w0, u, v) \
 782   ({union {UDItype __ll;                                                \
 783            struct {USItype __l, __h;} __i;                              \
 784           } __xx;                                                       \
 785   __asm__ ("meid %2,%0"                                                 \
 786            : "=g" (__xx.__ll)                                           \
 787            : "%0" ((USItype) (u)),                                      \
 788              "g" ((USItype) (v)));                                      \
 789   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 790 #define __umulsidi3(u, v) \
 791   ({UDItype __w;                                                        \
 792     __asm__ ("meid %2,%0"                                               \
 793              : "=g" (__w)                                               \
 794              : "%0" ((USItype) (u)),                                    \
 795                "g" ((USItype) (v)));                                    \
 796     __w; })
 797 #define udiv_qrnnd(q, r, n1, n0, d) \
 798   ({union {UDItype __ll;                                                \
 799            struct {USItype __l, __h;} __i;                              \
 800           } __xx;                                                       \
 801   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
 802   __asm__ ("deid %2,%0"                                                 \
 803            : "=g" (__xx.__ll)                                           \
 804            : "0" (__xx.__ll),                                           \
 805              "g" ((USItype) (d)));                                      \
 806   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
 807 #define count_trailing_zeros(count,x) \
 808   do {                                                                  \
 809     __asm__ ("ffsd     %2,%0"                                           \
 810             : "=r" ((USItype) (count))                                  \
 811             : "0" ((USItype) 0),                                        \
 812               "r" ((USItype) (x)));                                     \
 813   } while (0)
 814 #endif /* __ns32000__ */
 815
 816 /* FIXME: We should test _IBMR2 here when we add assembly support for the
 817    system vendor compilers.
 818    FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
 819    enough, since that hits ARM and m68k too.  */
 820 #if (defined (_ARCH_PPC)        /* AIX */                               \
 821      || defined (_ARCH_PWR)     /* AIX */                               \
 822      || defined (_ARCH_COM)     /* AIX */                               \
 823      || defined (__powerpc__)   /* gcc */                               \
 824      || defined (__POWERPC__)   /* BEOS */                              \
 825      || defined (__ppc__)       /* Darwin */                            \
 826      || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
 827      || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
 828          && CPU_FAMILY == PPC)                                                \
 829      ) && W_TYPE_SIZE == 32
 830 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 831   do {                                                                  \
 832     if (__builtin_constant_p (bh) && (bh) == 0)                         \
 833       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
 834              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 835     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
 836       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
 837              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 838     else                                                                \
 839       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
 840              : "=r" (sh), "=&r" (sl)                                    \
 841              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
 842   } while (0)
 843 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 844   do {                                                                  \
 845     if (__builtin_constant_p (ah) && (ah) == 0)                         \
 846       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
 847                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 848     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)         \
 849       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
 850                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 851     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
 852       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
 853                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 854     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
 855       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
 856                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 857     else                                                                \
 858       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
 859                : "=r" (sh), "=&r" (sl)                                  \
 860                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
 861   } while (0)
 862 #define count_leading_zeros(count, x) \
 863   __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
 864 #define COUNT_LEADING_ZEROS_0 32
 865 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
 866   || defined (__ppc__)                                                    \
 867   || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
 868   || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
 869          && CPU_FAMILY == PPC)
 870 #define umul_ppmm(ph, pl, m0, m1) \
 871   do {                                                                  \
 872     USItype __m0 = (m0), __m1 = (m1);                                   \
 873     __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
 874     (pl) = __m0 * __m1;                                                 \
 875   } while (0)
 876 #define UMUL_TIME 15
 877 #define smul_ppmm(ph, pl, m0, m1) \
 878   do {                                                                  \
 879     SItype __m0 = (m0), __m1 = (m1);                                    \
 880     __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
 881     (pl) = __m0 * __m1;                                                 \
 882   } while (0)
 883 #define SMUL_TIME 14
 884 #define UDIV_TIME 120
 885 #elif defined (_ARCH_PWR)
 886 #define UMUL_TIME 8
 887 #define smul_ppmm(xh, xl, m0, m1) \
 888   __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
 889 #define SMUL_TIME 4
 890 #define sdiv_qrnnd(q, r, nh, nl, d) \
 891   __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
 892 #define UDIV_TIME 100
 893 #endif
 894 #endif /* 32-bit POWER architecture variants.  */
 895
 896 /* We should test _IBMR2 here when we add assembly support for the system
 897    vendor compilers.  */
 898 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
 899 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 900   do {                                                                  \
 901     if (__builtin_constant_p (bh) && (bh) == 0)                         \
 902       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
 903              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 904     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
 905       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
 906              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 907     else                                                                \
 908       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
 909              : "=r" (sh), "=&r" (sl)                                    \
 910              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
 911   } while (0)
 912 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 913   do {                                                                  \
 914     if (__builtin_constant_p (ah) && (ah) == 0)                         \
 915       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
 916                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 917     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)         \
 918       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
 919                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 920     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
 921       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
 922                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 923     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
 924       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
 925                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 926     else                                                                \
 927       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
 928                : "=r" (sh), "=&r" (sl)                                  \
 929                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
 930   } while (0)
 931 #define count_leading_zeros(count, x) \
 932   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
 933 #define COUNT_LEADING_ZEROS_0 64
 934 #define umul_ppmm(ph, pl, m0, m1) \
 935   do {                                                                  \
 936     UDItype __m0 = (m0), __m1 = (m1);                                   \
 937     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
 938     (pl) = __m0 * __m1;                                                 \
 939   } while (0)
 940 #define UMUL_TIME 15
 941 #define smul_ppmm(ph, pl, m0, m1) \
 942   do {                                                                  \
 943     DItype __m0 = (m0), __m1 = (m1);                                    \
 944     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
 945     (pl) = __m0 * __m1;                                                 \
 946   } while (0)
 947 #define SMUL_TIME 14  /* ??? */
 948 #define UDIV_TIME 120 /* ??? */
 949 #endif /* 64-bit PowerPC.  */
 950
 951 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
 952 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 953   __asm__ ("a %1,%5\n\tae %0,%3"                                        \
 954            : "=r" ((USItype) (sh)),                                     \
 955              "=&r" ((USItype) (sl))                                     \
 956            : "%0" ((USItype) (ah)),                                     \
 957              "r" ((USItype) (bh)),                                      \
 958              "%1" ((USItype) (al)),                                     \
 959              "r" ((USItype) (bl)))
 960 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 961   __asm__ ("s %1,%5\n\tse %0,%3"                                        \
 962            : "=r" ((USItype) (sh)),                                     \
 963              "=&r" ((USItype) (sl))                                     \
 964            : "0" ((USItype) (ah)),                                      \
 965              "r" ((USItype) (bh)),                                      \
 966              "1" ((USItype) (al)),                                      \
 967              "r" ((USItype) (bl)))
 968 #define umul_ppmm(ph, pl, m0, m1) \
 969   do {                                                                  \
 970     USItype __m0 = (m0), __m1 = (m1);                                   \
 971     __asm__ (                                                           \
 972        "s       r2,r2\n"                                                \
 973 "       mts     r10,%2\n"                                               \
 974 "       m       r2,%3\n"                                                \
 975 "       m       r2,%3\n"                                                \
 976 "       m       r2,%3\n"                                                \
 977 "       m       r2,%3\n"                                                \
 978 "       m       r2,%3\n"                                                \
 979 "       m       r2,%3\n"                                                \
 980 "       m       r2,%3\n"                                                \
 981 "       m       r2,%3\n"                                                \
 982 "       m       r2,%3\n"                                                \
 983 "       m       r2,%3\n"                                                \
 984 "       m       r2,%3\n"                                                \
 985 "       m       r2,%3\n"                                                \
 986 "       m       r2,%3\n"                                                \
 987 "       m       r2,%3\n"                                                \
 988 "       m       r2,%3\n"                                                \
 989 "       m       r2,%3\n"                                                \
 990 "       cas     %0,r2,r0\n"                                             \
 991 "       mfs     r10,%1"                                                 \
 992              : "=r" ((USItype) (ph)),                                   \
 993                "=r" ((USItype) (pl))                                    \
 994              : "%r" (__m0),                                             \
 995                 "r" (__m1)                                              \
 996              : "r2");                                                   \
 997     (ph) += ((((SItype) __m0 >> 31) & __m1)                             \
 998              + (((SItype) __m1 >> 31) & __m0));                         \
 999   } while (0)
1000 #define UMUL_TIME 20
1001 #define UDIV_TIME 200
1002 #define count_leading_zeros(count, x) \
1003   do {                                                                  \
1004     if ((x) >= 0x10000)                                                 \
1005       __asm__ ("clz     %0,%1"                                          \
1006                : "=r" ((USItype) (count))                               \
1007                : "r" ((USItype) (x) >> 16));                            \
1008     else                                                                \
1009       {                                                                 \
1010         __asm__ ("clz   %0,%1"                                          \
1011                  : "=r" ((USItype) (count))                             \
1012                  : "r" ((USItype) (x)));                                        \
1013         (count) += 16;                                                  \
1014       }                                                                 \
1015   } while (0)
1016 #endif
1017
1018 #if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
1019 #ifndef __sh1__
1020 #define umul_ppmm(w1, w0, u, v) \
1021   __asm__ (                                                             \
1022        "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0       mach,%0"        \
1023            : "=r<" ((USItype)(w1)),                                     \
1024              "=r<" ((USItype)(w0))                                      \
1025            : "r" ((USItype)(u)),                                        \
1026              "r" ((USItype)(v))                                         \
1027            : "macl", "mach")
1028 #define UMUL_TIME 5
1029 #endif
1030
1031 /* This is the same algorithm as __udiv_qrnnd_c.  */
1032 #define UDIV_NEEDS_NORMALIZATION 1
1033
1034 #define udiv_qrnnd(q, r, n1, n0, d) \
1035   do {                                                                  \
1036     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)                      \
1037                         __attribute__ ((visibility ("hidden")));        \
1038     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */  \
1039     __asm__ (                                                           \
1040         "mov%M4 %4,r5\n"                                                \
1041 "       swap.w %3,r4\n"                                                 \
1042 "       swap.w r5,r6\n"                                                 \
1043 "       jsr @%5\n"                                                      \
1044 "       shll16 r6\n"                                                    \
1045 "       swap.w r4,r4\n"                                                 \
1046 "       jsr @%5\n"                                                      \
1047 "       swap.w r1,%0\n"                                                 \
1048 "       or r1,%0"                                                       \
1049         : "=r" (q), "=&z" (r)                                           \
1050         : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)          \
1051         : "r1", "r2", "r4", "r5", "r6", "pr", "t");                     \
1052   } while (0)
1053
1054 #define UDIV_TIME 80
1055
1056 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1057   __asm__ ("clrt;subc %5,%1; subc %4,%0"                                \
1058            : "=r" (sh), "=r" (sl)                                       \
1059            : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
1060
1061 #endif /* __sh__ */
1062
1063 #if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
1064 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1065 #define count_leading_zeros(count, x) \
1066   do                                                                    \
1067     {                                                                   \
1068       UDItype x_ = (USItype)(x);                                        \
1069       SItype c_;                                                        \
1070                                                                         \
1071       __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));                    \
1072       (count) = c_ - 31;                                                \
1073     }                                                                   \
1074   while (0)
1075 #define COUNT_LEADING_ZEROS_0 32
1076 #endif
1077
1078 #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1079     && W_TYPE_SIZE == 32
1080 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1081   __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"                          \
1082            : "=r" ((USItype) (sh)),                                     \
1083              "=&r" ((USItype) (sl))                                     \
1084            : "%rJ" ((USItype) (ah)),                                    \
1085              "rI" ((USItype) (bh)),                                     \
1086              "%rJ" ((USItype) (al)),                                    \
1087              "rI" ((USItype) (bl))                                      \
1088            __CLOBBER_CC)
1089 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1090   __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"                          \
1091            : "=r" ((USItype) (sh)),                                     \
1092              "=&r" ((USItype) (sl))                                     \
1093            : "rJ" ((USItype) (ah)),                                     \
1094              "rI" ((USItype) (bh)),                                     \
1095              "rJ" ((USItype) (al)),                                     \
1096              "rI" ((USItype) (bl))                                      \
1097            __CLOBBER_CC)
1098 #if defined (__sparc_v8__)
1099 #define umul_ppmm(w1, w0, u, v) \
1100   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1101            : "=r" ((USItype) (w1)),                                     \
1102              "=r" ((USItype) (w0))                                      \
1103            : "r" ((USItype) (u)),                                       \
1104              "r" ((USItype) (v)))
1105 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1106   __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
1107            : "=&r" ((USItype) (__q)),                                   \
1108              "=&r" ((USItype) (__r))                                    \
1109            : "r" ((USItype) (__n1)),                                    \
1110              "r" ((USItype) (__n0)),                                    \
1111              "r" ((USItype) (__d)))
1112 #else
1113 #if defined (__sparclite__)
1114 /* This has hardware multiply but not divide.  It also has two additional
1115    instructions scan (ffs from high bit) and divscc.  */
1116 #define umul_ppmm(w1, w0, u, v) \
1117   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1118            : "=r" ((USItype) (w1)),                                     \
1119              "=r" ((USItype) (w0))                                      \
1120            : "r" ((USItype) (u)),                                       \
1121              "r" ((USItype) (v)))
1122 #define udiv_qrnnd(q, r, n1, n0, d) \
1123   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1124 "       wr      %%g0,%2,%%y     ! Not a delayed write for sparclite\n"  \
1125 "       tst     %%g0\n"                                                 \
1126 "       divscc  %3,%4,%%g1\n"                                           \
1127 "       divscc  %%g1,%4,%%g1\n"                                         \
1128 "       divscc  %%g1,%4,%%g1\n"                                         \
1129 "       divscc  %%g1,%4,%%g1\n"                                         \
1130 "       divscc  %%g1,%4,%%g1\n"                                         \
1131 "       divscc  %%g1,%4,%%g1\n"                                         \
1132 "       divscc  %%g1,%4,%%g1\n"                                         \
1133 "       divscc  %%g1,%4,%%g1\n"                                         \
1134 "       divscc  %%g1,%4,%%g1\n"                                         \
1135 "       divscc  %%g1,%4,%%g1\n"                                         \
1136 "       divscc  %%g1,%4,%%g1\n"                                         \
1137 "       divscc  %%g1,%4,%%g1\n"                                         \
1138 "       divscc  %%g1,%4,%%g1\n"                                         \
1139 "       divscc  %%g1,%4,%%g1\n"                                         \
1140 "       divscc  %%g1,%4,%%g1\n"                                         \
1141 "       divscc  %%g1,%4,%%g1\n"                                         \
1142 "       divscc  %%g1,%4,%%g1\n"                                         \
1143 "       divscc  %%g1,%4,%%g1\n"                                         \
1144 "       divscc  %%g1,%4,%%g1\n"                                         \
1145 "       divscc  %%g1,%4,%%g1\n"                                         \
1146 "       divscc  %%g1,%4,%%g1\n"                                         \
1147 "       divscc  %%g1,%4,%%g1\n"                                         \
1148 "       divscc  %%g1,%4,%%g1\n"                                         \
1149 "       divscc  %%g1,%4,%%g1\n"                                         \
1150 "       divscc  %%g1,%4,%%g1\n"                                         \
1151 "       divscc  %%g1,%4,%%g1\n"                                         \
1152 "       divscc  %%g1,%4,%%g1\n"                                         \
1153 "       divscc  %%g1,%4,%%g1\n"                                         \
1154 "       divscc  %%g1,%4,%%g1\n"                                         \
1155 "       divscc  %%g1,%4,%%g1\n"                                         \
1156 "       divscc  %%g1,%4,%%g1\n"                                         \
1157 "       divscc  %%g1,%4,%0\n"                                           \
1158 "       rd      %%y,%1\n"                                               \
1159 "       bl,a 1f\n"                                                      \
1160 "       add     %1,%4,%1\n"                                             \
1161 "1:     ! End of inline udiv_qrnnd"                                     \
1162            : "=r" ((USItype) (q)),                                      \
1163              "=r" ((USItype) (r))                                       \
1164            : "r" ((USItype) (n1)),                                      \
1165              "r" ((USItype) (n0)),                                      \
1166              "rI" ((USItype) (d))                                       \
1167            : "g1" __AND_CLOBBER_CC)
1168 #define UDIV_TIME 37
1169 #define count_leading_zeros(count, x) \
1170   do {                                                                  \
1171   __asm__ ("scan %1,1,%0"                                               \
1172            : "=r" ((USItype) (count))                                   \
1173            : "r" ((USItype) (x)));                                      \
1174   } while (0)
1175 /* Early sparclites return 63 for an argument of 0, but they warn that future
1176    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1177    undefined.  */
1178 #else
1179 /* SPARC without integer multiplication and divide instructions.
1180    (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1181 #define umul_ppmm(w1, w0, u, v) \
1182   __asm__ ("! Inlined umul_ppmm\n"                                      \
1183 "       wr      %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n"\
1184 "       sra     %3,31,%%o5      ! Don't move this insn\n"               \
1185 "       and     %2,%%o5,%%o5    ! Don't move this insn\n"               \
1186 "       andcc   %%g0,0,%%g1     ! Don't move this insn\n"               \
1187 "       mulscc  %%g1,%3,%%g1\n"                                         \
1188 "       mulscc  %%g1,%3,%%g1\n"                                         \
1189 "       mulscc  %%g1,%3,%%g1\n"                                         \
1190 "       mulscc  %%g1,%3,%%g1\n"                                         \
1191 "       mulscc  %%g1,%3,%%g1\n"                                         \
1192 "       mulscc  %%g1,%3,%%g1\n"                                         \
1193 "       mulscc  %%g1,%3,%%g1\n"                                         \
1194 "       mulscc  %%g1,%3,%%g1\n"                                         \
1195 "       mulscc  %%g1,%3,%%g1\n"                                         \
1196 "       mulscc  %%g1,%3,%%g1\n"                                         \
1197 "       mulscc  %%g1,%3,%%g1\n"                                         \
1198 "       mulscc  %%g1,%3,%%g1\n"                                         \
1199 "       mulscc  %%g1,%3,%%g1\n"                                         \
1200 "       mulscc  %%g1,%3,%%g1\n"                                         \
1201 "       mulscc  %%g1,%3,%%g1\n"                                         \
1202 "       mulscc  %%g1,%3,%%g1\n"                                         \
1203 "       mulscc  %%g1,%3,%%g1\n"                                         \
1204 "       mulscc  %%g1,%3,%%g1\n"                                         \
1205 "       mulscc  %%g1,%3,%%g1\n"                                         \
1206 "       mulscc  %%g1,%3,%%g1\n"                                         \
1207 "       mulscc  %%g1,%3,%%g1\n"                                         \
1208 "       mulscc  %%g1,%3,%%g1\n"                                         \
1209 "       mulscc  %%g1,%3,%%g1\n"                                         \
1210 "       mulscc  %%g1,%3,%%g1\n"                                         \
1211 "       mulscc  %%g1,%3,%%g1\n"                                         \
1212 "       mulscc  %%g1,%3,%%g1\n"                                         \
1213 "       mulscc  %%g1,%3,%%g1\n"                                         \
1214 "       mulscc  %%g1,%3,%%g1\n"                                         \
1215 "       mulscc  %%g1,%3,%%g1\n"                                         \
1216 "       mulscc  %%g1,%3,%%g1\n"                                         \
1217 "       mulscc  %%g1,%3,%%g1\n"                                         \
1218 "       mulscc  %%g1,%3,%%g1\n"                                         \
1219 "       mulscc  %%g1,0,%%g1\n"                                          \
1220 "       add     %%g1,%%o5,%0\n"                                         \
1221 "       rd      %%y,%1"                                                 \
1222            : "=r" ((USItype) (w1)),                                     \
1223              "=r" ((USItype) (w0))                                      \
1224            : "%rI" ((USItype) (u)),                                     \
1225              "r" ((USItype) (v))                                                \
1226            : "g1", "o5" __AND_CLOBBER_CC)
1227 #define UMUL_TIME 39            /* 39 instructions */
1228 /* It's quite necessary to add this much assembler for the sparc.
1229    The default udiv_qrnnd (in C) is more than 10 times slower!  */
1230 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1231   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1232 "       mov     32,%%g1\n"                                              \
1233 "       subcc   %1,%2,%%g0\n"                                           \
1234 "1:     bcs     5f\n"                                                   \
1235 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1236 "       sub     %1,%2,%1        ! this kills msb of n\n"                \
1237 "       addx    %1,%1,%1        ! so this can't give carry\n"           \
1238 "       subcc   %%g1,1,%%g1\n"                                          \
1239 "2:     bne     1b\n"                                                   \
1240 "        subcc  %1,%2,%%g0\n"                                           \
1241 "       bcs     3f\n"                                                   \
1242 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1243 "       b       3f\n"                                                   \
1244 "        sub    %1,%2,%1        ! this kills msb of n\n"                \
1245 "4:     sub     %1,%2,%1\n"                                             \
1246 "5:     addxcc  %1,%1,%1\n"                                             \
1247 "       bcc     2b\n"                                                   \
1248 "        subcc  %%g1,1,%%g1\n"                                          \
1249 "! Got carry from n.  Subtract next step to cancel this carry.\n"       \
1250 "       bne     4b\n"                                                   \
1251 "        addcc  %0,%0,%0        ! shift n1n0 and a 0-bit in lsb\n"      \
1252 "       sub     %1,%2,%1\n"                                             \
1253 "3:     xnor    %0,0,%0\n"                                              \
1254 "       ! End of inline udiv_qrnnd"                                     \
1255            : "=&r" ((USItype) (__q)),                                   \
1256              "=&r" ((USItype) (__r))                                    \
1257            : "r" ((USItype) (__d)),                                     \
1258              "1" ((USItype) (__n1)),                                    \
1259              "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1260 #define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
1261 #endif /* __sparclite__ */
1262 #endif /* __sparc_v8__ */
1263 #endif /* sparc32 */
1264
1265 #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1266     && W_TYPE_SIZE == 64
1267 #define add_ssaaaa(sh, sl, ah, al, bh, bl)                              \
1268   __asm__ ("addcc %r4,%5,%1\n\t"                                        \
1269            "add %r2,%3,%0\n\t"                                          \
1270            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1271            "add %0, 1, %0\n"                                            \
1272            "1:"                                                         \
1273            : "=r" ((UDItype)(sh)),                                      \
1274              "=&r" ((UDItype)(sl))                                      \
1275            : "%rJ" ((UDItype)(ah)),                                     \
1276              "rI" ((UDItype)(bh)),                                      \
1277              "%rJ" ((UDItype)(al)),                                     \
1278              "rI" ((UDItype)(bl))                                       \
1279            __CLOBBER_CC)
1280
1281 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1282   __asm__ ("subcc %r4,%5,%1\n\t"                                        \
1283            "sub %r2,%3,%0\n\t"                                          \
1284            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1285            "sub %0, 1, %0\n\t"                                          \
1286            "1:"                                                         \
1287            : "=r" ((UDItype)(sh)),                                      \
1288              "=&r" ((UDItype)(sl))                                      \
1289            : "rJ" ((UDItype)(ah)),                                      \
1290              "rI" ((UDItype)(bh)),                                      \
1291              "rJ" ((UDItype)(al)),                                      \
1292              "rI" ((UDItype)(bl))                                       \
1293            __CLOBBER_CC)
1294
1295 #define umul_ppmm(wh, wl, u, v)                                         \
1296   do {                                                                  \
1297           UDItype tmp1, tmp2, tmp3, tmp4;                               \
1298           __asm__ __volatile__ (                                        \
1299                    "srl %7,0,%3\n\t"                                    \
1300                    "mulx %3,%6,%1\n\t"                                  \
1301                    "srlx %6,32,%2\n\t"                                  \
1302                    "mulx %2,%3,%4\n\t"                                  \
1303                    "sllx %4,32,%5\n\t"                                  \
1304                    "srl %6,0,%3\n\t"                                    \
1305                    "sub %1,%5,%5\n\t"                                   \
1306                    "srlx %5,32,%5\n\t"                                  \
1307                    "addcc %4,%5,%4\n\t"                                 \
1308                    "srlx %7,32,%5\n\t"                                  \
1309                    "mulx %3,%5,%3\n\t"                                  \
1310                    "mulx %2,%5,%5\n\t"                                  \
1311                    "sethi %%hi(0x80000000),%2\n\t"                      \
1312                    "addcc %4,%3,%4\n\t"                                 \
1313                    "srlx %4,32,%4\n\t"                                  \
1314                    "add %2,%2,%2\n\t"                                   \
1315                    "movcc %%xcc,%%g0,%2\n\t"                            \
1316                    "addcc %5,%4,%5\n\t"                                 \
1317                    "sllx %3,32,%3\n\t"                                  \
1318                    "add %1,%3,%1\n\t"                                   \
1319                    "add %5,%2,%0"                                       \
1320            : "=r" ((UDItype)(wh)),                                      \
1321              "=&r" ((UDItype)(wl)),                                     \
1322              "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)     \
1323            : "r" ((UDItype)(u)),                                        \
1324              "r" ((UDItype)(v))                                         \
1325            __CLOBBER_CC);                                               \
1326   } while (0)
1327 #define UMUL_TIME 96
1328 #define UDIV_TIME 230
1329 #endif /* sparc64 */
1330
1331 #if defined (__vax__) && W_TYPE_SIZE == 32
1332 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1333   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                  \
1334            : "=g" ((USItype) (sh)),                                     \
1335              "=&g" ((USItype) (sl))                                     \
1336            : "%0" ((USItype) (ah)),                                     \
1337              "g" ((USItype) (bh)),                                      \
1338              "%1" ((USItype) (al)),                                     \
1339              "g" ((USItype) (bl)))
1340 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1341   __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"                                  \
1342            : "=g" ((USItype) (sh)),                                     \
1343              "=&g" ((USItype) (sl))                                     \
1344            : "0" ((USItype) (ah)),                                      \
1345              "g" ((USItype) (bh)),                                      \
1346              "1" ((USItype) (al)),                                      \
1347              "g" ((USItype) (bl)))
1348 #define umul_ppmm(xh, xl, m0, m1) \
1349   do {                                                                  \
1350     union {                                                             \
1351         UDItype __ll;                                                   \
1352         struct {USItype __l, __h;} __i;                                 \
1353       } __xx;                                                           \
1354     USItype __m0 = (m0), __m1 = (m1);                                   \
1355     __asm__ ("emul %1,%2,$0,%0"                                         \
1356              : "=r" (__xx.__ll)                                         \
1357              : "g" (__m0),                                              \
1358                "g" (__m1));                                             \
1359     (xh) = __xx.__i.__h;                                                \
1360     (xl) = __xx.__i.__l;                                                \
1361     (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
1362              + (((SItype) __m1 >> 31) & __m0));                         \
1363   } while (0)
1364 #define sdiv_qrnnd(q, r, n1, n0, d) \
1365   do {                                                                  \
1366     union {DItype __ll;                                                 \
1367            struct {SItype __l, __h;} __i;                               \
1368           } __xx;                                                       \
1369     __xx.__i.__h = n1; __xx.__i.__l = n0;                               \
1370     __asm__ ("ediv %3,%2,%0,%1"                                         \
1371              : "=g" (q), "=g" (r)                                       \
1372              : "g" (__xx.__ll), "g" (d));                               \
1373   } while (0)
1374 #endif /* __vax__ */
1375
1376 #if defined (__xtensa__) && W_TYPE_SIZE == 32
1377 /* This code is not Xtensa-configuration-specific, so rely on the compiler
1378    to expand builtin functions depending on what configuration features
1379    are available.  This avoids library calls when the operation can be
1380    performed in-line.  */
1381 #define umul_ppmm(w1, w0, u, v)                                         \
1382   do {                                                                  \
1383     DWunion __w;                                                        \
1384     __w.ll = __builtin_umulsidi3 (u, v);                                \
1385     w1 = __w.s.high;                                                    \
1386     w0 = __w.s.low;                                                     \
1387   } while (0)
1388 #define __umulsidi3(u, v)               __builtin_umulsidi3 (u, v)
1389 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz (X))
1390 #define count_trailing_zeros(COUNT, X)  ((COUNT) = __builtin_ctz (X))
1391 #endif /* __xtensa__ */
1392
1393 #if defined xstormy16
1394 extern UHItype __stormy16_count_leading_zeros (UHItype);
1395 #define count_leading_zeros(count, x)                                   \
1396   do                                                                    \
1397     {                                                                   \
1398       UHItype size;                                                     \
1399                                                                         \
1400       /* We assume that W_TYPE_SIZE is a multiple of 16...  */          \
1401       for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16)           \
1402         {                                                               \
1403           UHItype c;                                                    \
1404                                                                         \
1405           c = __clzhi2 ((x) >> (size - 16));                            \
1406           (count) += c;                                                 \
1407           if (c != 16)                                                  \
1408             break;                                                      \
1409         }                                                               \
1410     }                                                                   \
1411   while (0)
1412 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1413 #endif
1414
1415 #if defined (__z8000__) && W_TYPE_SIZE == 16
1416 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1417   __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
1418            : "=r" ((unsigned int)(sh)),                                 \
1419              "=&r" ((unsigned int)(sl))                                 \
1420            : "%0" ((unsigned int)(ah)),                                 \
1421              "r" ((unsigned int)(bh)),                                  \
1422              "%1" ((unsigned int)(al)),                                 \
1423              "rQR" ((unsigned int)(bl)))
1424 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1425   __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
1426            : "=r" ((unsigned int)(sh)),                                 \
1427              "=&r" ((unsigned int)(sl))                                 \
1428            : "0" ((unsigned int)(ah)),                                  \
1429              "r" ((unsigned int)(bh)),                                  \
1430              "1" ((unsigned int)(al)),                                  \
1431              "rQR" ((unsigned int)(bl)))
1432 #define umul_ppmm(xh, xl, m0, m1) \
1433   do {                                                                  \
1434     union {long int __ll;                                               \
1435            struct {unsigned int __h, __l;} __i;                         \
1436           } __xx;                                                       \
1437     unsigned int __m0 = (m0), __m1 = (m1);                              \
1438     __asm__ ("mult      %S0,%H3"                                        \
1439              : "=r" (__xx.__i.__h),                                     \
1440                "=r" (__xx.__i.__l)                                      \
1441              : "%1" (__m0),                                             \
1442                "rQR" (__m1));                                           \
1443     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
1444     (xh) += ((((signed int) __m0 >> 15) & __m1)                         \
1445              + (((signed int) __m1 >> 15) & __m0));                     \
1446   } while (0)
1447 #endif /* __z8000__ */
1448
1449 #endif /* __GNUC__ */
1450
1451 /* If this machine has no inline assembler, use C macros.  */
1452
1453 #if !defined (add_ssaaaa)
1454 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1455   do {                                                                  \
1456     UWtype __x;                                                         \
1457     __x = (al) + (bl);                                                  \
1458     (sh) = (ah) + (bh) + (__x < (al));                                  \
1459     (sl) = __x;                                                         \
1460   } while (0)
1461 #endif
1462
1463 #if !defined (sub_ddmmss)
1464 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1465   do {                                                                  \
1466     UWtype __x;                                                         \
1467     __x = (al) - (bl);                                                  \
1468     (sh) = (ah) - (bh) - (__x > (al));                                  \
1469     (sl) = __x;                                                         \
1470   } while (0)
1471 #endif
1472
1473 /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1474    smul_ppmm.  */
1475 #if !defined (umul_ppmm) && defined (smul_ppmm)
1476 #define umul_ppmm(w1, w0, u, v)                                         \
1477   do {                                                                  \
1478     UWtype __w1;                                                        \
1479     UWtype __xm0 = (u), __xm1 = (v);                                    \
1480     smul_ppmm (__w1, w0, __xm0, __xm1);                                 \
1481     (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)               \
1482                 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);              \
1483   } while (0)
1484 #endif
1485
1486 /* If we still don't have umul_ppmm, define it using plain C.  */
1487 #if !defined (umul_ppmm)
1488 #define umul_ppmm(w1, w0, u, v)                                         \
1489   do {                                                                  \
1490     UWtype __x0, __x1, __x2, __x3;                                      \
1491     UHWtype __ul, __vl, __uh, __vh;                                     \
1492                                                                         \
1493     __ul = __ll_lowpart (u);                                            \
1494     __uh = __ll_highpart (u);                                           \
1495     __vl = __ll_lowpart (v);                                            \
1496     __vh = __ll_highpart (v);                                           \
1497                                                                         \
1498     __x0 = (UWtype) __ul * __vl;                                        \
1499     __x1 = (UWtype) __ul * __vh;                                        \
1500     __x2 = (UWtype) __uh * __vl;                                        \
1501     __x3 = (UWtype) __uh * __vh;                                        \
1502                                                                         \
1503     __x1 += __ll_highpart (__x0);/* this can't give carry */            \
1504     __x1 += __x2;               /* but this indeed can */               \
1505     if (__x1 < __x2)            /* did we get it? */                    \
1506       __x3 += __ll_B;           /* yes, add it in the proper pos.  */   \
1507                                                                         \
1508     (w1) = __x3 + __ll_highpart (__x1);                                 \
1509     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);          \
1510   } while (0)
1511 #endif
1512
1513 #if !defined (__umulsidi3)
1514 #define __umulsidi3(u, v) \
1515   ({DWunion __w;                                                        \
1516     umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
1517     __w.ll; })
1518 #endif
1519
1520 /* Define this unconditionally, so it can be used for debugging.  */
1521 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1522   do {                                                                  \
1523     UWtype __d1, __d0, __q1, __q0;                                      \
1524     UWtype __r1, __r0, __m;                                             \
1525     __d1 = __ll_highpart (d);                                           \
1526     __d0 = __ll_lowpart (d);                                            \
1527                                                                         \
1528     __r1 = (n1) % __d1;                                                 \
1529     __q1 = (n1) / __d1;                                                 \
1530     __m = (UWtype) __q1 * __d0;                                         \
1531     __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
1532     if (__r1 < __m)                                                     \
1533       {                                                                 \
1534         __q1--, __r1 += (d);                                            \
1535         if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1536           if (__r1 < __m)                                               \
1537             __q1--, __r1 += (d);                                        \
1538       }                                                                 \
1539     __r1 -= __m;                                                        \
1540                                                                         \
1541     __r0 = __r1 % __d1;                                                 \
1542     __q0 = __r1 / __d1;                                                 \
1543     __m = (UWtype) __q0 * __d0;                                         \
1544     __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
1545     if (__r0 < __m)                                                     \
1546       {                                                                 \
1547         __q0--, __r0 += (d);                                            \
1548         if (__r0 >= (d))                                                \
1549           if (__r0 < __m)                                               \
1550             __q0--, __r0 += (d);                                        \
1551       }                                                                 \
1552     __r0 -= __m;                                                        \
1553                                                                         \
1554     (q) = (UWtype) __q1 * __ll_B | __q0;                                \
1555     (r) = __r0;                                                         \
1556   } while (0)
1557
1558 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1559    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1560 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1561 #define udiv_qrnnd(q, r, nh, nl, d) \
1562   do {                                                                  \
1563     USItype __r;                                                        \
1564     (q) = __udiv_w_sdiv (&__r, nh, nl, d);                              \
1565     (r) = __r;                                                          \
1566   } while (0)
1567 #endif
1568
1569 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1570 #if !defined (udiv_qrnnd)
1571 #define UDIV_NEEDS_NORMALIZATION 1
1572 #define udiv_qrnnd __udiv_qrnnd_c
1573 #endif
1574
1575 #if !defined (count_leading_zeros)
1576 #define count_leading_zeros(count, x) \
1577   do {                                                                  \
1578     UWtype __xr = (x);                                                  \
1579     UWtype __a;                                                         \
1580                                                                         \
1581     if (W_TYPE_SIZE <= 32)                                              \
1582       {                                                                 \
1583         __a = __xr < ((UWtype)1<<2*__BITS4)                             \
1584           ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)                 \
1585           : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);   \
1586       }                                                                 \
1587     else                                                                \
1588       {                                                                 \
1589         for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)                  \
1590           if (((__xr >> __a) & 0xff) != 0)                              \
1591             break;                                                      \
1592       }                                                                 \
1593                                                                         \
1594     (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);             \
1595   } while (0)
1596 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1597 #endif
1598
1599 #if !defined (count_trailing_zeros)
1600 /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1601    defined in asm, but if it is not, the C version above is good enough.  */
1602 #define count_trailing_zeros(count, x) \
1603   do {                                                                  \
1604     UWtype __ctz_x = (x);                                               \
1605     UWtype __ctz_c;                                                     \
1606     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                  \
1607     (count) = W_TYPE_SIZE - 1 - __ctz_c;                                \
1608   } while (0)
1609 #endif
1610
1611 #ifndef UDIV_NEEDS_NORMALIZATION
1612 #define UDIV_NEEDS_NORMALIZATION 0
1613 #endif