2014-05-01 Steve Ellcey <sellcey@mips.com>
[external/binutils.git] / include / longlong.h
1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
2    Copyright (C) 1991-2014 Free Software Foundation, Inc.
3
4    This file is part of the GNU C Library.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    In addition to the permissions in the GNU Lesser General Public
12    License, the Free Software Foundation gives you unlimited
13    permission to link the compiled version of this file into
14    combinations with other programs, and to distribute those
15    combinations without any restriction coming from the use of this
16    file.  (The Lesser General Public License restrictions do apply in
17    other respects; for example, they cover modification of the file,
18    and distribution when not linked into a combine executable.)
19
20    The GNU C Library is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    Lesser General Public License for more details.
24
25    You should have received a copy of the GNU Lesser General Public
26    License along with the GNU C Library; if not, see
27    <http://www.gnu.org/licenses/>.  */
28
29 /* You have to define the following before including this file:
30
31    UWtype -- An unsigned type, default type for operations (typically a "word")
32    UHWtype -- An unsigned type, at least half the size of UWtype.
33    UDWtype -- An unsigned type, at least twice as large a UWtype
34    W_TYPE_SIZE -- size in bits of UWtype
35
36    UQItype -- Unsigned 8 bit type.
37    SItype, USItype -- Signed and unsigned 32 bit types.
38    DItype, UDItype -- Signed and unsigned 64 bit types.
39
40    On a 32 bit machine UWtype should typically be USItype;
41    on a 64 bit machine, UWtype should typically be UDItype.  */
42
43 #define __BITS4 (W_TYPE_SIZE / 4)
44 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
45 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
46 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
47
48 #ifndef W_TYPE_SIZE
49 #define W_TYPE_SIZE     32
50 #define UWtype          USItype
51 #define UHWtype         USItype
52 #define UDWtype         UDItype
53 #endif
54
55 /* Used in glibc only.  */
56 #ifndef attribute_hidden
57 #define attribute_hidden
58 #endif
59
60 extern const UQItype __clz_tab[256] attribute_hidden;
61
62 /* Define auxiliary asm macros.
63
64    1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
65    UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
66    word product in HIGH_PROD and LOW_PROD.
67
68    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
69    UDWtype product.  This is just a variant of umul_ppmm.
70
71    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
72    denominator) divides a UDWtype, composed by the UWtype integers
73    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
74    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
75    than DENOMINATOR for correct operation.  If, in addition, the most
76    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
77    UDIV_NEEDS_NORMALIZATION is defined to 1.
78
79    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
80    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
81    is rounded towards 0.
82
83    5) count_leading_zeros(count, x) counts the number of zero-bits from the
84    msb to the first nonzero bit in the UWtype X.  This is the number of
85    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
86    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
87
88    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
89    from the least significant end.
90
91    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
92    high_addend_2, low_addend_2) adds two UWtype integers, composed by
93    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
94    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
95    (i.e. carry out) is not stored anywhere, and is lost.
96
97    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
98    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
99    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
100    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
101    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
102    and is lost.
103
104    If any of these macros are left undefined for a particular CPU,
105    C macros are used.  */
106
107 /* The CPUs come in alphabetical order below.
108
109    Please add support for more CPUs here, or improve the current support
110    for the CPUs below!
111    (E.g. WE32100, IBM360.)  */
112
113 #if defined (__GNUC__) && !defined (NO_ASM)
114
115 /* We sometimes need to clobber "cc" with gcc2, but that would not be
116    understood by gcc1.  Use cpp to avoid major code duplication.  */
117 #if __GNUC__ < 2
118 #define __CLOBBER_CC
119 #define __AND_CLOBBER_CC
120 #else /* __GNUC__ >= 2 */
121 #define __CLOBBER_CC : "cc"
122 #define __AND_CLOBBER_CC , "cc"
123 #endif /* __GNUC__ < 2 */
124
125 #if defined (__aarch64__)
126
127 #if W_TYPE_SIZE == 32
128 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz (X))
129 #define count_trailing_zeros(COUNT, X)   ((COUNT) = __builtin_ctz (X))
130 #define COUNT_LEADING_ZEROS_0 32
131 #endif /* W_TYPE_SIZE == 32 */
132
133 #if W_TYPE_SIZE == 64
134 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clzll (X))
135 #define count_trailing_zeros(COUNT, X)   ((COUNT) = __builtin_ctzll (X))
136 #define COUNT_LEADING_ZEROS_0 64
137 #endif /* W_TYPE_SIZE == 64 */
138
139 #endif /* __aarch64__ */
140
141 #if defined (__alpha) && W_TYPE_SIZE == 64
142 #define umul_ppmm(ph, pl, m0, m1) \
143   do {                                                                  \
144     UDItype __m0 = (m0), __m1 = (m1);                                   \
145     (ph) = __builtin_alpha_umulh (__m0, __m1);                          \
146     (pl) = __m0 * __m1;                                                 \
147   } while (0)
148 #define UMUL_TIME 46
149 #ifndef LONGLONG_STANDALONE
150 #define udiv_qrnnd(q, r, n1, n0, d) \
151   do { UDItype __r;                                                     \
152     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
153     (r) = __r;                                                          \
154   } while (0)
155 extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
156 #define UDIV_TIME 220
157 #endif /* LONGLONG_STANDALONE */
158 #ifdef __alpha_cix__
159 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
160 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
161 #define COUNT_LEADING_ZEROS_0 64
162 #else
163 #define count_leading_zeros(COUNT,X) \
164   do {                                                                  \
165     UDItype __xr = (X), __t, __a;                                       \
166     __t = __builtin_alpha_cmpbge (0, __xr);                             \
167     __a = __clz_tab[__t ^ 0xff] - 1;                                    \
168     __t = __builtin_alpha_extbl (__xr, __a);                            \
169     (COUNT) = 64 - (__clz_tab[__t] + __a*8);                            \
170   } while (0)
171 #define count_trailing_zeros(COUNT,X) \
172   do {                                                                  \
173     UDItype __xr = (X), __t, __a;                                       \
174     __t = __builtin_alpha_cmpbge (0, __xr);                             \
175     __t = ~__t & -~__t;                                                 \
176     __a = ((__t & 0xCC) != 0) * 2;                                      \
177     __a += ((__t & 0xF0) != 0) * 4;                                     \
178     __a += ((__t & 0xAA) != 0);                                         \
179     __t = __builtin_alpha_extbl (__xr, __a);                            \
180     __a <<= 3;                                                          \
181     __t &= -__t;                                                        \
182     __a += ((__t & 0xCC) != 0) * 2;                                     \
183     __a += ((__t & 0xF0) != 0) * 4;                                     \
184     __a += ((__t & 0xAA) != 0);                                         \
185     (COUNT) = __a;                                                      \
186   } while (0)
187 #endif /* __alpha_cix__ */
188 #endif /* __alpha */
189
190 #if defined (__arc__) && W_TYPE_SIZE == 32
191 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
192   __asm__ ("add.f       %1, %4, %5\n\tadc       %0, %2, %3"             \
193            : "=r" ((USItype) (sh)),                                     \
194              "=&r" ((USItype) (sl))                                     \
195            : "%r" ((USItype) (ah)),                                     \
196              "rIJ" ((USItype) (bh)),                                    \
197              "%r" ((USItype) (al)),                                     \
198              "rIJ" ((USItype) (bl)))
199 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
200   __asm__ ("sub.f       %1, %4, %5\n\tsbc       %0, %2, %3"             \
201            : "=r" ((USItype) (sh)),                                     \
202              "=&r" ((USItype) (sl))                                     \
203            : "r" ((USItype) (ah)),                                      \
204              "rIJ" ((USItype) (bh)),                                    \
205              "r" ((USItype) (al)),                                      \
206              "rIJ" ((USItype) (bl)))
207
208 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
209 #ifdef __ARC_NORM__
210 #define count_leading_zeros(count, x) \
211   do                                                                    \
212     {                                                                   \
213       SItype c_;                                                        \
214                                                                         \
215       __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\
216       (count) = c_ + 1;                                                 \
217     }                                                                   \
218   while (0)
219 #define COUNT_LEADING_ZEROS_0 32
220 #endif
221 #endif
222
223 #if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
224  && W_TYPE_SIZE == 32
225 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
226   __asm__ ("adds        %1, %4, %5\n\tadc       %0, %2, %3"             \
227            : "=r" ((USItype) (sh)),                                     \
228              "=&r" ((USItype) (sl))                                     \
229            : "%r" ((USItype) (ah)),                                     \
230              "rI" ((USItype) (bh)),                                     \
231              "%r" ((USItype) (al)),                                     \
232              "rI" ((USItype) (bl)) __CLOBBER_CC)
233 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
234   __asm__ ("subs        %1, %4, %5\n\tsbc       %0, %2, %3"             \
235            : "=r" ((USItype) (sh)),                                     \
236              "=&r" ((USItype) (sl))                                     \
237            : "r" ((USItype) (ah)),                                      \
238              "rI" ((USItype) (bh)),                                     \
239              "r" ((USItype) (al)),                                      \
240              "rI" ((USItype) (bl)) __CLOBBER_CC)
241 # if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
242      || defined(__ARM_ARCH_3__)
243 #  define umul_ppmm(xh, xl, a, b)                                       \
244   do {                                                                  \
245     register USItype __t0, __t1, __t2;                                  \
246     __asm__ ("%@ Inlined umul_ppmm\n"                                   \
247            "    mov     %2, %5, lsr #16\n"                              \
248            "    mov     %0, %6, lsr #16\n"                              \
249            "    bic     %3, %5, %2, lsl #16\n"                          \
250            "    bic     %4, %6, %0, lsl #16\n"                          \
251            "    mul     %1, %3, %4\n"                                   \
252            "    mul     %4, %2, %4\n"                                   \
253            "    mul     %3, %0, %3\n"                                   \
254            "    mul     %0, %2, %0\n"                                   \
255            "    adds    %3, %4, %3\n"                                   \
256            "    addcs   %0, %0, #65536\n"                               \
257            "    adds    %1, %1, %3, lsl #16\n"                          \
258            "    adc     %0, %0, %3, lsr #16"                            \
259            : "=&r" ((USItype) (xh)),                                    \
260              "=r" ((USItype) (xl)),                                     \
261              "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
262            : "r" ((USItype) (a)),                                       \
263              "r" ((USItype) (b)) __CLOBBER_CC );                        \
264   } while (0)
265 #  define UMUL_TIME 20
266 # else
267 #  define umul_ppmm(xh, xl, a, b)                                       \
268   do {                                                                  \
269     /* Generate umull, under compiler control.  */                      \
270     register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b);       \
271     (xl) = (USItype)__t0;                                               \
272     (xh) = (USItype)(__t0 >> 32);                                       \
273   } while (0)
274 #  define UMUL_TIME 3
275 # endif
276 # define UDIV_TIME 100
277 #endif /* __arm__ */
278
279 #if defined(__arm__)
280 /* Let gcc decide how best to implement count_leading_zeros.  */
281 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
282 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctz (X))
283 #define COUNT_LEADING_ZEROS_0 32
284 #endif
285
286 #if defined (__AVR__)
287
288 #if W_TYPE_SIZE == 16
289 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
290 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
291 #define COUNT_LEADING_ZEROS_0 16
292 #endif /* W_TYPE_SIZE == 16 */
293
294 #if W_TYPE_SIZE == 32
295 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzl (X))
296 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
297 #define COUNT_LEADING_ZEROS_0 32
298 #endif /* W_TYPE_SIZE == 32 */
299
300 #if W_TYPE_SIZE == 64
301 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzll (X))
302 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
303 #define COUNT_LEADING_ZEROS_0 64
304 #endif /* W_TYPE_SIZE == 64 */
305
306 #endif /* defined (__AVR__) */
307
308 #if defined (__CRIS__)
309
310 #if __CRIS_arch_version >= 3
311 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
312 #define COUNT_LEADING_ZEROS_0 32
313 #endif /* __CRIS_arch_version >= 3 */
314
315 #if __CRIS_arch_version >= 8
316 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
317 #endif /* __CRIS_arch_version >= 8 */
318
319 #if __CRIS_arch_version >= 10
320 #define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v))
321 #else
322 #define __umulsidi3 __umulsidi3
323 extern UDItype __umulsidi3 (USItype, USItype);
324 #endif /* __CRIS_arch_version >= 10 */
325
326 #define umul_ppmm(w1, w0, u, v)         \
327   do {                                  \
328     UDItype __x = __umulsidi3 (u, v);   \
329     (w0) = (USItype) (__x);             \
330     (w1) = (USItype) (__x >> 32);       \
331   } while (0)
332
333 /* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for
334    DFmode ("double" intrinsics, avoiding two of the three insns handling
335    carry), but defining them as open-code C composing and doing the
336    operation in DImode (UDImode) shows that the DImode needs work:
337    register pressure from requiring neighboring registers and the
338    traffic to and from them come to dominate, in the 4.7 series.  */
339
340 #endif /* defined (__CRIS__) */
341
342 #if defined (__hppa) && W_TYPE_SIZE == 32
343 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
344   __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"                              \
345            : "=r" ((USItype) (sh)),                                     \
346              "=&r" ((USItype) (sl))                                     \
347            : "%rM" ((USItype) (ah)),                                    \
348              "rM" ((USItype) (bh)),                                     \
349              "%rM" ((USItype) (al)),                                    \
350              "rM" ((USItype) (bl)))
351 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
352   __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"                              \
353            : "=r" ((USItype) (sh)),                                     \
354              "=&r" ((USItype) (sl))                                     \
355            : "rM" ((USItype) (ah)),                                     \
356              "rM" ((USItype) (bh)),                                     \
357              "rM" ((USItype) (al)),                                     \
358              "rM" ((USItype) (bl)))
359 #if defined (_PA_RISC1_1)
360 #define umul_ppmm(w1, w0, u, v) \
361   do {                                                                  \
362     union                                                               \
363       {                                                                 \
364         UDItype __f;                                                    \
365         struct {USItype __w1, __w0;} __w1w0;                            \
366       } __t;                                                            \
367     __asm__ ("xmpyu %1,%2,%0"                                           \
368              : "=x" (__t.__f)                                           \
369              : "x" ((USItype) (u)),                                     \
370                "x" ((USItype) (v)));                                    \
371     (w1) = __t.__w1w0.__w1;                                             \
372     (w0) = __t.__w1w0.__w0;                                             \
373      } while (0)
374 #define UMUL_TIME 8
375 #else
376 #define UMUL_TIME 30
377 #endif
378 #define UDIV_TIME 40
379 #define count_leading_zeros(count, x) \
380   do {                                                                  \
381     USItype __tmp;                                                      \
382     __asm__ (                                                           \
383        "ldi             1,%0\n"                                         \
384 "       extru,=         %1,15,16,%%r0           ; Bits 31..16 zero?\n"  \
385 "       extru,tr        %1,15,16,%1             ; No.  Shift down, skip add.\n"\
386 "       ldo             16(%0),%0               ; Yes.  Perform add.\n" \
387 "       extru,=         %1,23,8,%%r0            ; Bits 15..8 zero?\n"   \
388 "       extru,tr        %1,23,8,%1              ; No.  Shift down, skip add.\n"\
389 "       ldo             8(%0),%0                ; Yes.  Perform add.\n" \
390 "       extru,=         %1,27,4,%%r0            ; Bits 7..4 zero?\n"    \
391 "       extru,tr        %1,27,4,%1              ; No.  Shift down, skip add.\n"\
392 "       ldo             4(%0),%0                ; Yes.  Perform add.\n" \
393 "       extru,=         %1,29,2,%%r0            ; Bits 3..2 zero?\n"    \
394 "       extru,tr        %1,29,2,%1              ; No.  Shift down, skip add.\n"\
395 "       ldo             2(%0),%0                ; Yes.  Perform add.\n" \
396 "       extru           %1,30,1,%1              ; Extract bit 1.\n"     \
397 "       sub             %0,%1,%0                ; Subtract it.\n"       \
398         : "=r" (count), "=r" (__tmp) : "1" (x));                        \
399   } while (0)
400 #endif
401
402 #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
403 #if !defined (__zarch__)
404 #define smul_ppmm(xh, xl, m0, m1) \
405   do {                                                                  \
406     union {DItype __ll;                                                 \
407            struct {USItype __h, __l;} __i;                              \
408           } __x;                                                        \
409     __asm__ ("lr %N0,%1\n\tmr %0,%2"                                    \
410              : "=&r" (__x.__ll)                                         \
411              : "r" (m0), "r" (m1));                                     \
412     (xh) = __x.__i.__h; (xl) = __x.__i.__l;                             \
413   } while (0)
414 #define sdiv_qrnnd(q, r, n1, n0, d) \
415   do {                                                                  \
416     union {DItype __ll;                                                 \
417            struct {USItype __h, __l;} __i;                              \
418           } __x;                                                        \
419     __x.__i.__h = n1; __x.__i.__l = n0;                                 \
420     __asm__ ("dr %0,%2"                                                 \
421              : "=r" (__x.__ll)                                          \
422              : "0" (__x.__ll), "r" (d));                                \
423     (q) = __x.__i.__l; (r) = __x.__i.__h;                               \
424   } while (0)
425 #else
426 #define smul_ppmm(xh, xl, m0, m1) \
427   do {                                                                  \
428     register SItype __r0 __asm__ ("0");                                 \
429     register SItype __r1 __asm__ ("1") = (m0);                          \
430                                                                         \
431     __asm__ ("mr\t%%r0,%3"                                              \
432              : "=r" (__r0), "=r" (__r1)                                 \
433              : "r"  (__r1),  "r" (m1));                                 \
434     (xh) = __r0; (xl) = __r1;                                           \
435   } while (0)
436
437 #define sdiv_qrnnd(q, r, n1, n0, d) \
438   do {                                                                  \
439     register SItype __r0 __asm__ ("0") = (n1);                          \
440     register SItype __r1 __asm__ ("1") = (n0);                          \
441                                                                         \
442     __asm__ ("dr\t%%r0,%4"                                              \
443              : "=r" (__r0), "=r" (__r1)                                 \
444              : "r" (__r0), "r" (__r1), "r" (d));                        \
445     (q) = __r1; (r) = __r0;                                             \
446   } while (0)
447 #endif /* __zarch__ */
448 #endif
449
450 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
451 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
452   __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"               \
453            : "=r" ((USItype) (sh)),                                     \
454              "=&r" ((USItype) (sl))                                     \
455            : "%0" ((USItype) (ah)),                                     \
456              "g" ((USItype) (bh)),                                      \
457              "%1" ((USItype) (al)),                                     \
458              "g" ((USItype) (bl)))
459 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
460   __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"               \
461            : "=r" ((USItype) (sh)),                                     \
462              "=&r" ((USItype) (sl))                                     \
463            : "0" ((USItype) (ah)),                                      \
464              "g" ((USItype) (bh)),                                      \
465              "1" ((USItype) (al)),                                      \
466              "g" ((USItype) (bl)))
467 #define umul_ppmm(w1, w0, u, v) \
468   __asm__ ("mul{l} %3"                                                  \
469            : "=a" ((USItype) (w0)),                                     \
470              "=d" ((USItype) (w1))                                      \
471            : "%0" ((USItype) (u)),                                      \
472              "rm" ((USItype) (v)))
473 #define udiv_qrnnd(q, r, n1, n0, dv) \
474   __asm__ ("div{l} %4"                                                  \
475            : "=a" ((USItype) (q)),                                      \
476              "=d" ((USItype) (r))                                       \
477            : "0" ((USItype) (n0)),                                      \
478              "1" ((USItype) (n1)),                                      \
479              "rm" ((USItype) (dv)))
480 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
481 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
482 #define UMUL_TIME 40
483 #define UDIV_TIME 40
484 #endif /* 80x86 */
485
486 #if defined (__x86_64__) && W_TYPE_SIZE == 64
487 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
488   __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"               \
489            : "=r" ((UDItype) (sh)),                                     \
490              "=&r" ((UDItype) (sl))                                     \
491            : "%0" ((UDItype) (ah)),                                     \
492              "rme" ((UDItype) (bh)),                                    \
493              "%1" ((UDItype) (al)),                                     \
494              "rme" ((UDItype) (bl)))
495 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
496   __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"               \
497            : "=r" ((UDItype) (sh)),                                     \
498              "=&r" ((UDItype) (sl))                                     \
499            : "0" ((UDItype) (ah)),                                      \
500              "rme" ((UDItype) (bh)),                                    \
501              "1" ((UDItype) (al)),                                      \
502              "rme" ((UDItype) (bl)))
503 #define umul_ppmm(w1, w0, u, v) \
504   __asm__ ("mul{q} %3"                                                  \
505            : "=a" ((UDItype) (w0)),                                     \
506              "=d" ((UDItype) (w1))                                      \
507            : "%0" ((UDItype) (u)),                                      \
508              "rm" ((UDItype) (v)))
509 #define udiv_qrnnd(q, r, n1, n0, dv) \
510   __asm__ ("div{q} %4"                                                  \
511            : "=a" ((UDItype) (q)),                                      \
512              "=d" ((UDItype) (r))                                       \
513            : "0" ((UDItype) (n0)),                                      \
514              "1" ((UDItype) (n1)),                                      \
515              "rm" ((UDItype) (dv)))
516 #define count_leading_zeros(count, x)   ((count) = __builtin_clzll (x))
517 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctzll (x))
518 #define UMUL_TIME 40
519 #define UDIV_TIME 40
520 #endif /* x86_64 */
521
522 #if defined (__i960__) && W_TYPE_SIZE == 32
523 #define umul_ppmm(w1, w0, u, v) \
524   ({union {UDItype __ll;                                                \
525            struct {USItype __l, __h;} __i;                              \
526           } __xx;                                                       \
527   __asm__ ("emul        %2,%1,%0"                                       \
528            : "=d" (__xx.__ll)                                           \
529            : "%dI" ((USItype) (u)),                                     \
530              "dI" ((USItype) (v)));                                     \
531   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
532 #define __umulsidi3(u, v) \
533   ({UDItype __w;                                                        \
534     __asm__ ("emul      %2,%1,%0"                                       \
535              : "=d" (__w)                                               \
536              : "%dI" ((USItype) (u)),                                   \
537                "dI" ((USItype) (v)));                                   \
538     __w; })
539 #endif /* __i960__ */
540
541 #if defined (__ia64) && W_TYPE_SIZE == 64
542 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
543    "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
544    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
545    register, which takes an extra cycle.  */
546 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
547   do {                                                                  \
548     UWtype __x;                                                         \
549     __x = (al) - (bl);                                                  \
550     if ((al) < (bl))                                                    \
551       (sh) = (ah) - (bh) - 1;                                           \
552     else                                                                \
553       (sh) = (ah) - (bh);                                               \
554     (sl) = __x;                                                         \
555   } while (0)
556
557 /* Do both product parts in assembly, since that gives better code with
558    all gcc versions.  Some callers will just use the upper part, and in
559    that situation we waste an instruction, but not any cycles.  */
560 #define umul_ppmm(ph, pl, m0, m1)                                       \
561   __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"            \
562            : "=&f" (ph), "=f" (pl)                                      \
563            : "f" (m0), "f" (m1))
564 #define count_leading_zeros(count, x)                                   \
565   do {                                                                  \
566     UWtype _x = (x), _y, _a, _c;                                        \
567     __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));              \
568     __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));            \
569     _c = (_a - 1) << 3;                                                 \
570     _x >>= _c;                                                          \
571     if (_x >= 1 << 4)                                                   \
572       _x >>= 4, _c += 4;                                                \
573     if (_x >= 1 << 2)                                                   \
574       _x >>= 2, _c += 2;                                                \
575     _c += _x >> 1;                                                      \
576     (count) =  W_TYPE_SIZE - 1 - _c;                                    \
577   } while (0)
578 /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
579    based, and we don't need a special case for x==0 here */
580 #define count_trailing_zeros(count, x)                                  \
581   do {                                                                  \
582     UWtype __ctz_x = (x);                                               \
583     __asm__ ("popcnt %0 = %1"                                           \
584              : "=r" (count)                                             \
585              : "r" ((__ctz_x-1) & ~__ctz_x));                           \
586   } while (0)
587 #define UMUL_TIME 14
588 #endif
589
590 #if defined (__M32R__) && W_TYPE_SIZE == 32
591 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
592   /* The cmp clears the condition bit.  */ \
593   __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"                      \
594            : "=r" ((USItype) (sh)),                                     \
595              "=&r" ((USItype) (sl))                                     \
596            : "0" ((USItype) (ah)),                                      \
597              "r" ((USItype) (bh)),                                      \
598              "1" ((USItype) (al)),                                      \
599              "r" ((USItype) (bl))                                       \
600            : "cbit")
601 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
602   /* The cmp clears the condition bit.  */ \
603   __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"                      \
604            : "=r" ((USItype) (sh)),                                     \
605              "=&r" ((USItype) (sl))                                     \
606            : "0" ((USItype) (ah)),                                      \
607              "r" ((USItype) (bh)),                                      \
608              "1" ((USItype) (al)),                                      \
609              "r" ((USItype) (bl))                                       \
610            : "cbit")
611 #endif /* __M32R__ */
612
613 #if defined (__mc68000__) && W_TYPE_SIZE == 32
614 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
615   __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"                              \
616            : "=d" ((USItype) (sh)),                                     \
617              "=&d" ((USItype) (sl))                                     \
618            : "%0" ((USItype) (ah)),                                     \
619              "d" ((USItype) (bh)),                                      \
620              "%1" ((USItype) (al)),                                     \
621              "g" ((USItype) (bl)))
622 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
623   __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"                              \
624            : "=d" ((USItype) (sh)),                                     \
625              "=&d" ((USItype) (sl))                                     \
626            : "0" ((USItype) (ah)),                                      \
627              "d" ((USItype) (bh)),                                      \
628              "1" ((USItype) (al)),                                      \
629              "g" ((USItype) (bl)))
630
631 /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
632 #if (defined (__mc68020__) && !defined (__mc68060__))
633 #define umul_ppmm(w1, w0, u, v) \
634   __asm__ ("mulu%.l %3,%1:%0"                                           \
635            : "=d" ((USItype) (w0)),                                     \
636              "=d" ((USItype) (w1))                                      \
637            : "%0" ((USItype) (u)),                                      \
638              "dmi" ((USItype) (v)))
639 #define UMUL_TIME 45
640 #define udiv_qrnnd(q, r, n1, n0, d) \
641   __asm__ ("divu%.l %4,%1:%0"                                           \
642            : "=d" ((USItype) (q)),                                      \
643              "=d" ((USItype) (r))                                       \
644            : "0" ((USItype) (n0)),                                      \
645              "1" ((USItype) (n1)),                                      \
646              "dmi" ((USItype) (d)))
647 #define UDIV_TIME 90
648 #define sdiv_qrnnd(q, r, n1, n0, d) \
649   __asm__ ("divs%.l %4,%1:%0"                                           \
650            : "=d" ((USItype) (q)),                                      \
651              "=d" ((USItype) (r))                                       \
652            : "0" ((USItype) (n0)),                                      \
653              "1" ((USItype) (n1)),                                      \
654              "dmi" ((USItype) (d)))
655
656 #elif defined (__mcoldfire__) /* not mc68020 */
657
658 #define umul_ppmm(xh, xl, a, b) \
659   __asm__ ("| Inlined umul_ppmm\n"                                      \
660            "    move%.l %2,%/d0\n"                                      \
661            "    move%.l %3,%/d1\n"                                      \
662            "    move%.l %/d0,%/d2\n"                                    \
663            "    swap    %/d0\n"                                         \
664            "    move%.l %/d1,%/d3\n"                                    \
665            "    swap    %/d1\n"                                         \
666            "    move%.w %/d2,%/d4\n"                                    \
667            "    mulu    %/d3,%/d4\n"                                    \
668            "    mulu    %/d1,%/d2\n"                                    \
669            "    mulu    %/d0,%/d3\n"                                    \
670            "    mulu    %/d0,%/d1\n"                                    \
671            "    move%.l %/d4,%/d0\n"                                    \
672            "    clr%.w  %/d0\n"                                         \
673            "    swap    %/d0\n"                                         \
674            "    add%.l  %/d0,%/d2\n"                                    \
675            "    add%.l  %/d3,%/d2\n"                                    \
676            "    jcc     1f\n"                                           \
677            "    add%.l  %#65536,%/d1\n"                                 \
678            "1:  swap    %/d2\n"                                         \
679            "    moveq   %#0,%/d0\n"                                     \
680            "    move%.w %/d2,%/d0\n"                                    \
681            "    move%.w %/d4,%/d2\n"                                    \
682            "    move%.l %/d2,%1\n"                                      \
683            "    add%.l  %/d1,%/d0\n"                                    \
684            "    move%.l %/d0,%0"                                        \
685            : "=g" ((USItype) (xh)),                                     \
686              "=g" ((USItype) (xl))                                      \
687            : "g" ((USItype) (a)),                                       \
688              "g" ((USItype) (b))                                        \
689            : "d0", "d1", "d2", "d3", "d4")
690 #define UMUL_TIME 100
691 #define UDIV_TIME 400
692 #else /* not ColdFire */
693 /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
694 #define umul_ppmm(xh, xl, a, b) \
695   __asm__ ("| Inlined umul_ppmm\n"                                      \
696            "    move%.l %2,%/d0\n"                                      \
697            "    move%.l %3,%/d1\n"                                      \
698            "    move%.l %/d0,%/d2\n"                                    \
699            "    swap    %/d0\n"                                         \
700            "    move%.l %/d1,%/d3\n"                                    \
701            "    swap    %/d1\n"                                         \
702            "    move%.w %/d2,%/d4\n"                                    \
703            "    mulu    %/d3,%/d4\n"                                    \
704            "    mulu    %/d1,%/d2\n"                                    \
705            "    mulu    %/d0,%/d3\n"                                    \
706            "    mulu    %/d0,%/d1\n"                                    \
707            "    move%.l %/d4,%/d0\n"                                    \
708            "    eor%.w  %/d0,%/d0\n"                                    \
709            "    swap    %/d0\n"                                         \
710            "    add%.l  %/d0,%/d2\n"                                    \
711            "    add%.l  %/d3,%/d2\n"                                    \
712            "    jcc     1f\n"                                           \
713            "    add%.l  %#65536,%/d1\n"                                 \
714            "1:  swap    %/d2\n"                                         \
715            "    moveq   %#0,%/d0\n"                                     \
716            "    move%.w %/d2,%/d0\n"                                    \
717            "    move%.w %/d4,%/d2\n"                                    \
718            "    move%.l %/d2,%1\n"                                      \
719            "    add%.l  %/d1,%/d0\n"                                    \
720            "    move%.l %/d0,%0"                                        \
721            : "=g" ((USItype) (xh)),                                     \
722              "=g" ((USItype) (xl))                                      \
723            : "g" ((USItype) (a)),                                       \
724              "g" ((USItype) (b))                                        \
725            : "d0", "d1", "d2", "d3", "d4")
726 #define UMUL_TIME 100
727 #define UDIV_TIME 400
728
729 #endif /* not mc68020 */
730
731 /* The '020, '030, '040 and '060 have bitfield insns.
732    cpu32 disguises as a 68020, but lacks them.  */
733 #if defined (__mc68020__) && !defined (__mcpu32__)
734 #define count_leading_zeros(count, x) \
735   __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
736            : "=d" ((USItype) (count))                                   \
737            : "od" ((USItype) (x)), "n" (0))
738 /* Some ColdFire architectures have a ff1 instruction supported via
739    __builtin_clz. */
740 #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
741 #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
742 #define COUNT_LEADING_ZEROS_0 32
743 #endif
744 #endif /* mc68000 */
745
746 #if defined (__m88000__) && W_TYPE_SIZE == 32
747 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
748   __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"                   \
749            : "=r" ((USItype) (sh)),                                     \
750              "=&r" ((USItype) (sl))                                     \
751            : "%rJ" ((USItype) (ah)),                                    \
752              "rJ" ((USItype) (bh)),                                     \
753              "%rJ" ((USItype) (al)),                                    \
754              "rJ" ((USItype) (bl)))
755 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
756   __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"                   \
757            : "=r" ((USItype) (sh)),                                     \
758              "=&r" ((USItype) (sl))                                     \
759            : "rJ" ((USItype) (ah)),                                     \
760              "rJ" ((USItype) (bh)),                                     \
761              "rJ" ((USItype) (al)),                                     \
762              "rJ" ((USItype) (bl)))
763 #define count_leading_zeros(count, x) \
764   do {                                                                  \
765     USItype __cbtmp;                                                    \
766     __asm__ ("ff1 %0,%1"                                                \
767              : "=r" (__cbtmp)                                           \
768              : "r" ((USItype) (x)));                                    \
769     (count) = __cbtmp ^ 31;                                             \
770   } while (0)
771 #define COUNT_LEADING_ZEROS_0 63 /* sic */
772 #if defined (__mc88110__)
773 #define umul_ppmm(wh, wl, u, v) \
774   do {                                                                  \
775     union {UDItype __ll;                                                \
776            struct {USItype __h, __l;} __i;                              \
777           } __xx;                                                       \
778     __asm__ ("mulu.d    %0,%1,%2"                                       \
779              : "=r" (__xx.__ll)                                         \
780              : "r" ((USItype) (u)),                                     \
781                "r" ((USItype) (v)));                                    \
782     (wh) = __xx.__i.__h;                                                \
783     (wl) = __xx.__i.__l;                                                \
784   } while (0)
785 #define udiv_qrnnd(q, r, n1, n0, d) \
786   ({union {UDItype __ll;                                                \
787            struct {USItype __h, __l;} __i;                              \
788           } __xx;                                                       \
789   USItype __q;                                                          \
790   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
791   __asm__ ("divu.d %0,%1,%2"                                            \
792            : "=r" (__q)                                                 \
793            : "r" (__xx.__ll),                                           \
794              "r" ((USItype) (d)));                                      \
795   (r) = (n0) - __q * (d); (q) = __q; })
796 #define UMUL_TIME 5
797 #define UDIV_TIME 25
798 #else
799 #define UMUL_TIME 17
800 #define UDIV_TIME 150
801 #endif /* __mc88110__ */
802 #endif /* __m88000__ */
803
804 #if defined (__mn10300__)
805 # if defined (__AM33__)
806 #  define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
807 #  define umul_ppmm(w1, w0, u, v)               \
808     asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
809 #  define smul_ppmm(w1, w0, u, v)               \
810     asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
811 # else
812 #  define umul_ppmm(w1, w0, u, v)               \
813     asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
814 #  define smul_ppmm(w1, w0, u, v)               \
815     asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
816 # endif
817 # define add_ssaaaa(sh, sl, ah, al, bh, bl)     \
818   do {                                          \
819     DWunion __s, __a, __b;                      \
820     __a.s.low = (al); __a.s.high = (ah);        \
821     __b.s.low = (bl); __b.s.high = (bh);        \
822     __s.ll = __a.ll + __b.ll;                   \
823     (sl) = __s.s.low; (sh) = __s.s.high;        \
824   } while (0)
825 # define sub_ddmmss(sh, sl, ah, al, bh, bl)     \
826   do {                                          \
827     DWunion __s, __a, __b;                      \
828     __a.s.low = (al); __a.s.high = (ah);        \
829     __b.s.low = (bl); __b.s.high = (bh);        \
830     __s.ll = __a.ll - __b.ll;                   \
831     (sl) = __s.s.low; (sh) = __s.s.high;        \
832   } while (0)
833 # define udiv_qrnnd(q, r, nh, nl, d)            \
834   asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
835 # define sdiv_qrnnd(q, r, nh, nl, d)            \
836   asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
837 # define UMUL_TIME 3
838 # define UDIV_TIME 38
839 #endif
840
841 #if defined (__mips__) && W_TYPE_SIZE == 32
842 #define umul_ppmm(w1, w0, u, v)                                         \
843   do {                                                                  \
844     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
845     (w1) = (USItype) (__x >> 32);                                       \
846     (w0) = (USItype) (__x);                                             \
847   } while (0)
848 #define UMUL_TIME 10
849 #define UDIV_TIME 100
850
851 #if (__mips == 32 || __mips == 64) && ! defined (__mips16)
852 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
853 #define COUNT_LEADING_ZEROS_0 32
854 #endif
855 #endif /* __mips__ */
856
857 #if defined (__ns32000__) && W_TYPE_SIZE == 32
858 #define umul_ppmm(w1, w0, u, v) \
859   ({union {UDItype __ll;                                                \
860            struct {USItype __l, __h;} __i;                              \
861           } __xx;                                                       \
862   __asm__ ("meid %2,%0"                                                 \
863            : "=g" (__xx.__ll)                                           \
864            : "%0" ((USItype) (u)),                                      \
865              "g" ((USItype) (v)));                                      \
866   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
867 #define __umulsidi3(u, v) \
868   ({UDItype __w;                                                        \
869     __asm__ ("meid %2,%0"                                               \
870              : "=g" (__w)                                               \
871              : "%0" ((USItype) (u)),                                    \
872                "g" ((USItype) (v)));                                    \
873     __w; })
874 #define udiv_qrnnd(q, r, n1, n0, d) \
875   ({union {UDItype __ll;                                                \
876            struct {USItype __l, __h;} __i;                              \
877           } __xx;                                                       \
878   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
879   __asm__ ("deid %2,%0"                                                 \
880            : "=g" (__xx.__ll)                                           \
881            : "0" (__xx.__ll),                                           \
882              "g" ((USItype) (d)));                                      \
883   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
884 #define count_trailing_zeros(count,x) \
885   do {                                                                  \
886     __asm__ ("ffsd     %2,%0"                                           \
887             : "=r" ((USItype) (count))                                  \
888             : "0" ((USItype) 0),                                        \
889               "r" ((USItype) (x)));                                     \
890   } while (0)
891 #endif /* __ns32000__ */
892
893 /* FIXME: We should test _IBMR2 here when we add assembly support for the
894    system vendor compilers.
895    FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
896    enough, since that hits ARM and m68k too.  */
897 #if (defined (_ARCH_PPC)        /* AIX */                               \
898      || defined (__powerpc__)   /* gcc */                               \
899      || defined (__POWERPC__)   /* BEOS */                              \
900      || defined (__ppc__)       /* Darwin */                            \
901      || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
902      || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
903          && CPU_FAMILY == PPC)                                                \
904      ) && W_TYPE_SIZE == 32
905 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
906   do {                                                                  \
907     if (__builtin_constant_p (bh) && (bh) == 0)                         \
908       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"                \
909              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
910     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
911       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"                \
912              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
913     else                                                                \
914       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"              \
915              : "=r" (sh), "=&r" (sl)                                    \
916              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
917   } while (0)
918 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
919   do {                                                                  \
920     if (__builtin_constant_p (ah) && (ah) == 0)                         \
921       __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"      \
922                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
923     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)         \
924       __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"      \
925                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
926     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
927       __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"               \
928                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
929     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
930       __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"               \
931                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
932     else                                                                \
933       __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"    \
934                : "=r" (sh), "=&r" (sl)                                  \
935                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
936   } while (0)
937 #define count_leading_zeros(count, x) \
938   __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
939 #define COUNT_LEADING_ZEROS_0 32
940 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
941   || defined (__ppc__)                                                    \
942   || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
943   || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
944          && CPU_FAMILY == PPC)
945 #define umul_ppmm(ph, pl, m0, m1) \
946   do {                                                                  \
947     USItype __m0 = (m0), __m1 = (m1);                                   \
948     __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
949     (pl) = __m0 * __m1;                                                 \
950   } while (0)
951 #define UMUL_TIME 15
952 #define smul_ppmm(ph, pl, m0, m1) \
953   do {                                                                  \
954     SItype __m0 = (m0), __m1 = (m1);                                    \
955     __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
956     (pl) = __m0 * __m1;                                                 \
957   } while (0)
958 #define SMUL_TIME 14
959 #define UDIV_TIME 120
960 #endif
961 #endif /* 32-bit POWER architecture variants.  */
962
963 /* We should test _IBMR2 here when we add assembly support for the system
964    vendor compilers.  */
965 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
966 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
967   do {                                                                  \
968     if (__builtin_constant_p (bh) && (bh) == 0)                         \
969       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"                \
970              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
971     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
972       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"                \
973              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
974     else                                                                \
975       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"              \
976              : "=r" (sh), "=&r" (sl)                                    \
977              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
978   } while (0)
979 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
980   do {                                                                  \
981     if (__builtin_constant_p (ah) && (ah) == 0)                         \
982       __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"      \
983                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
984     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)         \
985       __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"      \
986                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
987     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
988       __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"               \
989                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
990     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
991       __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"               \
992                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
993     else                                                                \
994       __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"    \
995                : "=r" (sh), "=&r" (sl)                                  \
996                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
997   } while (0)
998 #define count_leading_zeros(count, x) \
999   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
1000 #define COUNT_LEADING_ZEROS_0 64
1001 #define umul_ppmm(ph, pl, m0, m1) \
1002   do {                                                                  \
1003     UDItype __m0 = (m0), __m1 = (m1);                                   \
1004     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
1005     (pl) = __m0 * __m1;                                                 \
1006   } while (0)
1007 #define UMUL_TIME 15
1008 #define smul_ppmm(ph, pl, m0, m1) \
1009   do {                                                                  \
1010     DItype __m0 = (m0), __m1 = (m1);                                    \
1011     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
1012     (pl) = __m0 * __m1;                                                 \
1013   } while (0)
1014 #define SMUL_TIME 14  /* ??? */
1015 #define UDIV_TIME 120 /* ??? */
1016 #endif /* 64-bit PowerPC.  */
1017
1018 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
1019 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1020   __asm__ ("a %1,%5\n\tae %0,%3"                                        \
1021            : "=r" ((USItype) (sh)),                                     \
1022              "=&r" ((USItype) (sl))                                     \
1023            : "%0" ((USItype) (ah)),                                     \
1024              "r" ((USItype) (bh)),                                      \
1025              "%1" ((USItype) (al)),                                     \
1026              "r" ((USItype) (bl)))
1027 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1028   __asm__ ("s %1,%5\n\tse %0,%3"                                        \
1029            : "=r" ((USItype) (sh)),                                     \
1030              "=&r" ((USItype) (sl))                                     \
1031            : "0" ((USItype) (ah)),                                      \
1032              "r" ((USItype) (bh)),                                      \
1033              "1" ((USItype) (al)),                                      \
1034              "r" ((USItype) (bl)))
1035 #define umul_ppmm(ph, pl, m0, m1) \
1036   do {                                                                  \
1037     USItype __m0 = (m0), __m1 = (m1);                                   \
1038     __asm__ (                                                           \
1039        "s       r2,r2\n"                                                \
1040 "       mts     r10,%2\n"                                               \
1041 "       m       r2,%3\n"                                                \
1042 "       m       r2,%3\n"                                                \
1043 "       m       r2,%3\n"                                                \
1044 "       m       r2,%3\n"                                                \
1045 "       m       r2,%3\n"                                                \
1046 "       m       r2,%3\n"                                                \
1047 "       m       r2,%3\n"                                                \
1048 "       m       r2,%3\n"                                                \
1049 "       m       r2,%3\n"                                                \
1050 "       m       r2,%3\n"                                                \
1051 "       m       r2,%3\n"                                                \
1052 "       m       r2,%3\n"                                                \
1053 "       m       r2,%3\n"                                                \
1054 "       m       r2,%3\n"                                                \
1055 "       m       r2,%3\n"                                                \
1056 "       m       r2,%3\n"                                                \
1057 "       cas     %0,r2,r0\n"                                             \
1058 "       mfs     r10,%1"                                                 \
1059              : "=r" ((USItype) (ph)),                                   \
1060                "=r" ((USItype) (pl))                                    \
1061              : "%r" (__m0),                                             \
1062                 "r" (__m1)                                              \
1063              : "r2");                                                   \
1064     (ph) += ((((SItype) __m0 >> 31) & __m1)                             \
1065              + (((SItype) __m1 >> 31) & __m0));                         \
1066   } while (0)
1067 #define UMUL_TIME 20
1068 #define UDIV_TIME 200
1069 #define count_leading_zeros(count, x) \
1070   do {                                                                  \
1071     if ((x) >= 0x10000)                                                 \
1072       __asm__ ("clz     %0,%1"                                          \
1073                : "=r" ((USItype) (count))                               \
1074                : "r" ((USItype) (x) >> 16));                            \
1075     else                                                                \
1076       {                                                                 \
1077         __asm__ ("clz   %0,%1"                                          \
1078                  : "=r" ((USItype) (count))                             \
1079                  : "r" ((USItype) (x)));                                        \
1080         (count) += 16;                                                  \
1081       }                                                                 \
1082   } while (0)
1083 #endif
1084
1085 #if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
1086 #ifndef __sh1__
1087 #define umul_ppmm(w1, w0, u, v) \
1088   __asm__ (                                                             \
1089        "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0       mach,%0"        \
1090            : "=r<" ((USItype)(w1)),                                     \
1091              "=r<" ((USItype)(w0))                                      \
1092            : "r" ((USItype)(u)),                                        \
1093              "r" ((USItype)(v))                                         \
1094            : "macl", "mach")
1095 #define UMUL_TIME 5
1096 #endif
1097
1098 /* This is the same algorithm as __udiv_qrnnd_c.  */
1099 #define UDIV_NEEDS_NORMALIZATION 1
1100
1101 #define udiv_qrnnd(q, r, n1, n0, d) \
1102   do {                                                                  \
1103     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)                      \
1104                         __attribute__ ((visibility ("hidden")));        \
1105     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */  \
1106     __asm__ (                                                           \
1107         "mov%M4 %4,r5\n"                                                \
1108 "       swap.w %3,r4\n"                                                 \
1109 "       swap.w r5,r6\n"                                                 \
1110 "       jsr @%5\n"                                                      \
1111 "       shll16 r6\n"                                                    \
1112 "       swap.w r4,r4\n"                                                 \
1113 "       jsr @%5\n"                                                      \
1114 "       swap.w r1,%0\n"                                                 \
1115 "       or r1,%0"                                                       \
1116         : "=r" (q), "=&z" (r)                                           \
1117         : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)          \
1118         : "r1", "r2", "r4", "r5", "r6", "pr", "t");                     \
1119   } while (0)
1120
1121 #define UDIV_TIME 80
1122
1123 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1124   __asm__ ("clrt;subc %5,%1; subc %4,%0"                                \
1125            : "=r" (sh), "=r" (sl)                                       \
1126            : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
1127
1128 #endif /* __sh__ */
1129
1130 #if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
1131 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1132 #define count_leading_zeros(count, x) \
1133   do                                                                    \
1134     {                                                                   \
1135       UDItype x_ = (USItype)(x);                                        \
1136       SItype c_;                                                        \
1137                                                                         \
1138       __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));                    \
1139       (count) = c_ - 31;                                                \
1140     }                                                                   \
1141   while (0)
1142 #define COUNT_LEADING_ZEROS_0 32
1143 #endif
1144
1145 #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1146     && W_TYPE_SIZE == 32
1147 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1148   __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"                          \
1149            : "=r" ((USItype) (sh)),                                     \
1150              "=&r" ((USItype) (sl))                                     \
1151            : "%rJ" ((USItype) (ah)),                                    \
1152              "rI" ((USItype) (bh)),                                     \
1153              "%rJ" ((USItype) (al)),                                    \
1154              "rI" ((USItype) (bl))                                      \
1155            __CLOBBER_CC)
1156 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1157   __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"                          \
1158            : "=r" ((USItype) (sh)),                                     \
1159              "=&r" ((USItype) (sl))                                     \
1160            : "rJ" ((USItype) (ah)),                                     \
1161              "rI" ((USItype) (bh)),                                     \
1162              "rJ" ((USItype) (al)),                                     \
1163              "rI" ((USItype) (bl))                                      \
1164            __CLOBBER_CC)
1165 #if defined (__sparc_v9__)
1166 #define umul_ppmm(w1, w0, u, v) \
1167   do {                                                                  \
1168     register USItype __g1 asm ("g1");                                   \
1169     __asm__ ("umul\t%2,%3,%1\n\t"                                       \
1170              "srlx\t%1, 32, %0"                                         \
1171              : "=r" ((USItype) (w1)),                                   \
1172                "=r" (__g1)                                              \
1173              : "r" ((USItype) (u)),                                     \
1174                "r" ((USItype) (v)));                                    \
1175     (w0) = __g1;                                                        \
1176   } while (0)
1177 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1178   __asm__ ("mov\t%2,%%y\n\t"                                            \
1179            "udiv\t%3,%4,%0\n\t"                                         \
1180            "umul\t%0,%4,%1\n\t"                                         \
1181            "sub\t%3,%1,%1"                                              \
1182            : "=&r" ((USItype) (__q)),                                   \
1183              "=&r" ((USItype) (__r))                                    \
1184            : "r" ((USItype) (__n1)),                                    \
1185              "r" ((USItype) (__n0)),                                    \
1186              "r" ((USItype) (__d)))
1187 #else
1188 #if defined (__sparc_v8__)
1189 #define umul_ppmm(w1, w0, u, v) \
1190   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1191            : "=r" ((USItype) (w1)),                                     \
1192              "=r" ((USItype) (w0))                                      \
1193            : "r" ((USItype) (u)),                                       \
1194              "r" ((USItype) (v)))
1195 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1196   __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
1197            : "=&r" ((USItype) (__q)),                                   \
1198              "=&r" ((USItype) (__r))                                    \
1199            : "r" ((USItype) (__n1)),                                    \
1200              "r" ((USItype) (__n0)),                                    \
1201              "r" ((USItype) (__d)))
1202 #else
1203 #if defined (__sparclite__)
1204 /* This has hardware multiply but not divide.  It also has two additional
1205    instructions scan (ffs from high bit) and divscc.  */
1206 #define umul_ppmm(w1, w0, u, v) \
1207   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1208            : "=r" ((USItype) (w1)),                                     \
1209              "=r" ((USItype) (w0))                                      \
1210            : "r" ((USItype) (u)),                                       \
1211              "r" ((USItype) (v)))
1212 #define udiv_qrnnd(q, r, n1, n0, d) \
1213   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1214 "       wr      %%g0,%2,%%y     ! Not a delayed write for sparclite\n"  \
1215 "       tst     %%g0\n"                                                 \
1216 "       divscc  %3,%4,%%g1\n"                                           \
1217 "       divscc  %%g1,%4,%%g1\n"                                         \
1218 "       divscc  %%g1,%4,%%g1\n"                                         \
1219 "       divscc  %%g1,%4,%%g1\n"                                         \
1220 "       divscc  %%g1,%4,%%g1\n"                                         \
1221 "       divscc  %%g1,%4,%%g1\n"                                         \
1222 "       divscc  %%g1,%4,%%g1\n"                                         \
1223 "       divscc  %%g1,%4,%%g1\n"                                         \
1224 "       divscc  %%g1,%4,%%g1\n"                                         \
1225 "       divscc  %%g1,%4,%%g1\n"                                         \
1226 "       divscc  %%g1,%4,%%g1\n"                                         \
1227 "       divscc  %%g1,%4,%%g1\n"                                         \
1228 "       divscc  %%g1,%4,%%g1\n"                                         \
1229 "       divscc  %%g1,%4,%%g1\n"                                         \
1230 "       divscc  %%g1,%4,%%g1\n"                                         \
1231 "       divscc  %%g1,%4,%%g1\n"                                         \
1232 "       divscc  %%g1,%4,%%g1\n"                                         \
1233 "       divscc  %%g1,%4,%%g1\n"                                         \
1234 "       divscc  %%g1,%4,%%g1\n"                                         \
1235 "       divscc  %%g1,%4,%%g1\n"                                         \
1236 "       divscc  %%g1,%4,%%g1\n"                                         \
1237 "       divscc  %%g1,%4,%%g1\n"                                         \
1238 "       divscc  %%g1,%4,%%g1\n"                                         \
1239 "       divscc  %%g1,%4,%%g1\n"                                         \
1240 "       divscc  %%g1,%4,%%g1\n"                                         \
1241 "       divscc  %%g1,%4,%%g1\n"                                         \
1242 "       divscc  %%g1,%4,%%g1\n"                                         \
1243 "       divscc  %%g1,%4,%%g1\n"                                         \
1244 "       divscc  %%g1,%4,%%g1\n"                                         \
1245 "       divscc  %%g1,%4,%%g1\n"                                         \
1246 "       divscc  %%g1,%4,%%g1\n"                                         \
1247 "       divscc  %%g1,%4,%0\n"                                           \
1248 "       rd      %%y,%1\n"                                               \
1249 "       bl,a 1f\n"                                                      \
1250 "       add     %1,%4,%1\n"                                             \
1251 "1:     ! End of inline udiv_qrnnd"                                     \
1252            : "=r" ((USItype) (q)),                                      \
1253              "=r" ((USItype) (r))                                       \
1254            : "r" ((USItype) (n1)),                                      \
1255              "r" ((USItype) (n0)),                                      \
1256              "rI" ((USItype) (d))                                       \
1257            : "g1" __AND_CLOBBER_CC)
1258 #define UDIV_TIME 37
1259 #define count_leading_zeros(count, x) \
1260   do {                                                                  \
1261   __asm__ ("scan %1,1,%0"                                               \
1262            : "=r" ((USItype) (count))                                   \
1263            : "r" ((USItype) (x)));                                      \
1264   } while (0)
1265 /* Early sparclites return 63 for an argument of 0, but they warn that future
1266    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1267    undefined.  */
1268 #else
1269 /* SPARC without integer multiplication and divide instructions.
1270    (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1271 #define umul_ppmm(w1, w0, u, v) \
1272   __asm__ ("! Inlined umul_ppmm\n"                                      \
1273 "       wr      %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n"\
1274 "       sra     %3,31,%%o5      ! Don't move this insn\n"               \
1275 "       and     %2,%%o5,%%o5    ! Don't move this insn\n"               \
1276 "       andcc   %%g0,0,%%g1     ! Don't move this insn\n"               \
1277 "       mulscc  %%g1,%3,%%g1\n"                                         \
1278 "       mulscc  %%g1,%3,%%g1\n"                                         \
1279 "       mulscc  %%g1,%3,%%g1\n"                                         \
1280 "       mulscc  %%g1,%3,%%g1\n"                                         \
1281 "       mulscc  %%g1,%3,%%g1\n"                                         \
1282 "       mulscc  %%g1,%3,%%g1\n"                                         \
1283 "       mulscc  %%g1,%3,%%g1\n"                                         \
1284 "       mulscc  %%g1,%3,%%g1\n"                                         \
1285 "       mulscc  %%g1,%3,%%g1\n"                                         \
1286 "       mulscc  %%g1,%3,%%g1\n"                                         \
1287 "       mulscc  %%g1,%3,%%g1\n"                                         \
1288 "       mulscc  %%g1,%3,%%g1\n"                                         \
1289 "       mulscc  %%g1,%3,%%g1\n"                                         \
1290 "       mulscc  %%g1,%3,%%g1\n"                                         \
1291 "       mulscc  %%g1,%3,%%g1\n"                                         \
1292 "       mulscc  %%g1,%3,%%g1\n"                                         \
1293 "       mulscc  %%g1,%3,%%g1\n"                                         \
1294 "       mulscc  %%g1,%3,%%g1\n"                                         \
1295 "       mulscc  %%g1,%3,%%g1\n"                                         \
1296 "       mulscc  %%g1,%3,%%g1\n"                                         \
1297 "       mulscc  %%g1,%3,%%g1\n"                                         \
1298 "       mulscc  %%g1,%3,%%g1\n"                                         \
1299 "       mulscc  %%g1,%3,%%g1\n"                                         \
1300 "       mulscc  %%g1,%3,%%g1\n"                                         \
1301 "       mulscc  %%g1,%3,%%g1\n"                                         \
1302 "       mulscc  %%g1,%3,%%g1\n"                                         \
1303 "       mulscc  %%g1,%3,%%g1\n"                                         \
1304 "       mulscc  %%g1,%3,%%g1\n"                                         \
1305 "       mulscc  %%g1,%3,%%g1\n"                                         \
1306 "       mulscc  %%g1,%3,%%g1\n"                                         \
1307 "       mulscc  %%g1,%3,%%g1\n"                                         \
1308 "       mulscc  %%g1,%3,%%g1\n"                                         \
1309 "       mulscc  %%g1,0,%%g1\n"                                          \
1310 "       add     %%g1,%%o5,%0\n"                                         \
1311 "       rd      %%y,%1"                                                 \
1312            : "=r" ((USItype) (w1)),                                     \
1313              "=r" ((USItype) (w0))                                      \
1314            : "%rI" ((USItype) (u)),                                     \
1315              "r" ((USItype) (v))                                                \
1316            : "g1", "o5" __AND_CLOBBER_CC)
1317 #define UMUL_TIME 39            /* 39 instructions */
1318 /* It's quite necessary to add this much assembler for the sparc.
1319    The default udiv_qrnnd (in C) is more than 10 times slower!  */
1320 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1321   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1322 "       mov     32,%%g1\n"                                              \
1323 "       subcc   %1,%2,%%g0\n"                                           \
1324 "1:     bcs     5f\n"                                                   \
1325 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1326 "       sub     %1,%2,%1        ! this kills msb of n\n"                \
1327 "       addx    %1,%1,%1        ! so this can't give carry\n"           \
1328 "       subcc   %%g1,1,%%g1\n"                                          \
1329 "2:     bne     1b\n"                                                   \
1330 "        subcc  %1,%2,%%g0\n"                                           \
1331 "       bcs     3f\n"                                                   \
1332 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1333 "       b       3f\n"                                                   \
1334 "        sub    %1,%2,%1        ! this kills msb of n\n"                \
1335 "4:     sub     %1,%2,%1\n"                                             \
1336 "5:     addxcc  %1,%1,%1\n"                                             \
1337 "       bcc     2b\n"                                                   \
1338 "        subcc  %%g1,1,%%g1\n"                                          \
1339 "! Got carry from n.  Subtract next step to cancel this carry.\n"       \
1340 "       bne     4b\n"                                                   \
1341 "        addcc  %0,%0,%0        ! shift n1n0 and a 0-bit in lsb\n"      \
1342 "       sub     %1,%2,%1\n"                                             \
1343 "3:     xnor    %0,0,%0\n"                                              \
1344 "       ! End of inline udiv_qrnnd"                                     \
1345            : "=&r" ((USItype) (__q)),                                   \
1346              "=&r" ((USItype) (__r))                                    \
1347            : "r" ((USItype) (__d)),                                     \
1348              "1" ((USItype) (__n1)),                                    \
1349              "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1350 #define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
1351 #endif /* __sparclite__ */
1352 #endif /* __sparc_v8__ */
1353 #endif /* __sparc_v9__ */
1354 #endif /* sparc32 */
1355
1356 #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1357     && W_TYPE_SIZE == 64
1358 #define add_ssaaaa(sh, sl, ah, al, bh, bl)                              \
1359   do {                                                                  \
1360     UDItype __carry = 0;                                                \
1361     __asm__ ("addcc\t%r5,%6,%1\n\t"                                     \
1362              "add\t%r3,%4,%0\n\t"                                       \
1363              "movcs\t%%xcc, 1, %2\n\t"                                  \
1364              "add\t%0, %2, %0"                                          \
1365              : "=r" ((UDItype)(sh)),                                    \
1366                "=&r" ((UDItype)(sl)),                                   \
1367                "+r" (__carry)                                           \
1368              : "%rJ" ((UDItype)(ah)),                                   \
1369                "rI" ((UDItype)(bh)),                                    \
1370                "%rJ" ((UDItype)(al)),                                   \
1371                "rI" ((UDItype)(bl))                                     \
1372              __CLOBBER_CC);                                             \
1373   } while (0)
1374
1375 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1376   do {                                                                  \
1377     UDItype __carry = 0;                                                \
1378     __asm__ ("subcc\t%r5,%6,%1\n\t"                                     \
1379              "sub\t%r3,%4,%0\n\t"                                       \
1380              "movcs\t%%xcc, 1, %2\n\t"                                  \
1381              "sub\t%0, %2, %0"                                          \
1382              : "=r" ((UDItype)(sh)),                                    \
1383                "=&r" ((UDItype)(sl)),                                   \
1384                "+r" (__carry)                                           \
1385              : "%rJ" ((UDItype)(ah)),                                   \
1386                "rI" ((UDItype)(bh)),                                    \
1387                "%rJ" ((UDItype)(al)),                                   \
1388                "rI" ((UDItype)(bl))                                     \
1389              __CLOBBER_CC);                                             \
1390   } while (0)
1391
1392 #define umul_ppmm(wh, wl, u, v)                                         \
1393   do {                                                                  \
1394           UDItype tmp1, tmp2, tmp3, tmp4;                               \
1395           __asm__ __volatile__ (                                        \
1396                    "srl %7,0,%3\n\t"                                    \
1397                    "mulx %3,%6,%1\n\t"                                  \
1398                    "srlx %6,32,%2\n\t"                                  \
1399                    "mulx %2,%3,%4\n\t"                                  \
1400                    "sllx %4,32,%5\n\t"                                  \
1401                    "srl %6,0,%3\n\t"                                    \
1402                    "sub %1,%5,%5\n\t"                                   \
1403                    "srlx %5,32,%5\n\t"                                  \
1404                    "addcc %4,%5,%4\n\t"                                 \
1405                    "srlx %7,32,%5\n\t"                                  \
1406                    "mulx %3,%5,%3\n\t"                                  \
1407                    "mulx %2,%5,%5\n\t"                                  \
1408                    "sethi %%hi(0x80000000),%2\n\t"                      \
1409                    "addcc %4,%3,%4\n\t"                                 \
1410                    "srlx %4,32,%4\n\t"                                  \
1411                    "add %2,%2,%2\n\t"                                   \
1412                    "movcc %%xcc,%%g0,%2\n\t"                            \
1413                    "addcc %5,%4,%5\n\t"                                 \
1414                    "sllx %3,32,%3\n\t"                                  \
1415                    "add %1,%3,%1\n\t"                                   \
1416                    "add %5,%2,%0"                                       \
1417            : "=r" ((UDItype)(wh)),                                      \
1418              "=&r" ((UDItype)(wl)),                                     \
1419              "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)     \
1420            : "r" ((UDItype)(u)),                                        \
1421              "r" ((UDItype)(v))                                         \
1422            __CLOBBER_CC);                                               \
1423   } while (0)
1424 #define UMUL_TIME 96
1425 #define UDIV_TIME 230
1426 #endif /* sparc64 */
1427
1428 #if defined (__vax__) && W_TYPE_SIZE == 32
1429 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1430   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                  \
1431            : "=g" ((USItype) (sh)),                                     \
1432              "=&g" ((USItype) (sl))                                     \
1433            : "%0" ((USItype) (ah)),                                     \
1434              "g" ((USItype) (bh)),                                      \
1435              "%1" ((USItype) (al)),                                     \
1436              "g" ((USItype) (bl)))
1437 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1438   __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"                                  \
1439            : "=g" ((USItype) (sh)),                                     \
1440              "=&g" ((USItype) (sl))                                     \
1441            : "0" ((USItype) (ah)),                                      \
1442              "g" ((USItype) (bh)),                                      \
1443              "1" ((USItype) (al)),                                      \
1444              "g" ((USItype) (bl)))
1445 #define umul_ppmm(xh, xl, m0, m1) \
1446   do {                                                                  \
1447     union {                                                             \
1448         UDItype __ll;                                                   \
1449         struct {USItype __l, __h;} __i;                                 \
1450       } __xx;                                                           \
1451     USItype __m0 = (m0), __m1 = (m1);                                   \
1452     __asm__ ("emul %1,%2,$0,%0"                                         \
1453              : "=r" (__xx.__ll)                                         \
1454              : "g" (__m0),                                              \
1455                "g" (__m1));                                             \
1456     (xh) = __xx.__i.__h;                                                \
1457     (xl) = __xx.__i.__l;                                                \
1458     (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
1459              + (((SItype) __m1 >> 31) & __m0));                         \
1460   } while (0)
1461 #define sdiv_qrnnd(q, r, n1, n0, d) \
1462   do {                                                                  \
1463     union {DItype __ll;                                                 \
1464            struct {SItype __l, __h;} __i;                               \
1465           } __xx;                                                       \
1466     __xx.__i.__h = n1; __xx.__i.__l = n0;                               \
1467     __asm__ ("ediv %3,%2,%0,%1"                                         \
1468              : "=g" (q), "=g" (r)                                       \
1469              : "g" (__xx.__ll), "g" (d));                               \
1470   } while (0)
1471 #endif /* __vax__ */
1472
1473 #ifdef _TMS320C6X
1474 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1475   do                                                                    \
1476     {                                                                   \
1477       UDItype __ll;                                                     \
1478       __asm__ ("addu .l1 %1, %2, %0"                                    \
1479                : "=a" (__ll) : "a" (al), "a" (bl));                     \
1480       (sl) = (USItype)__ll;                                             \
1481       (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh);                     \
1482     }                                                                   \
1483   while (0)
1484
1485 #ifdef _TMS320C6400_PLUS
1486 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1487 #define umul_ppmm(w1, w0, u, v)                                         \
1488   do {                                                                  \
1489     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
1490     (w1) = (USItype) (__x >> 32);                                       \
1491     (w0) = (USItype) (__x);                                             \
1492   } while (0)
1493 #endif  /* _TMS320C6400_PLUS */
1494
1495 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
1496 #ifdef _TMS320C6400
1497 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
1498 #endif
1499 #define UMUL_TIME 4
1500 #define UDIV_TIME 40
1501 #endif /* _TMS320C6X */
1502
1503 #if defined (__xtensa__) && W_TYPE_SIZE == 32
1504 /* This code is not Xtensa-configuration-specific, so rely on the compiler
1505    to expand builtin functions depending on what configuration features
1506    are available.  This avoids library calls when the operation can be
1507    performed in-line.  */
1508 #define umul_ppmm(w1, w0, u, v)                                         \
1509   do {                                                                  \
1510     DWunion __w;                                                        \
1511     __w.ll = __builtin_umulsidi3 (u, v);                                \
1512     w1 = __w.s.high;                                                    \
1513     w0 = __w.s.low;                                                     \
1514   } while (0)
1515 #define __umulsidi3(u, v)               __builtin_umulsidi3 (u, v)
1516 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz (X))
1517 #define count_trailing_zeros(COUNT, X)  ((COUNT) = __builtin_ctz (X))
1518 #endif /* __xtensa__ */
1519
1520 #if defined xstormy16
1521 extern UHItype __stormy16_count_leading_zeros (UHItype);
1522 #define count_leading_zeros(count, x)                                   \
1523   do                                                                    \
1524     {                                                                   \
1525       UHItype size;                                                     \
1526                                                                         \
1527       /* We assume that W_TYPE_SIZE is a multiple of 16...  */          \
1528       for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16)           \
1529         {                                                               \
1530           UHItype c;                                                    \
1531                                                                         \
1532           c = __clzhi2 ((x) >> (size - 16));                            \
1533           (count) += c;                                                 \
1534           if (c != 16)                                                  \
1535             break;                                                      \
1536         }                                                               \
1537     }                                                                   \
1538   while (0)
1539 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1540 #endif
1541
1542 #if defined (__z8000__) && W_TYPE_SIZE == 16
1543 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1544   __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
1545            : "=r" ((unsigned int)(sh)),                                 \
1546              "=&r" ((unsigned int)(sl))                                 \
1547            : "%0" ((unsigned int)(ah)),                                 \
1548              "r" ((unsigned int)(bh)),                                  \
1549              "%1" ((unsigned int)(al)),                                 \
1550              "rQR" ((unsigned int)(bl)))
1551 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1552   __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
1553            : "=r" ((unsigned int)(sh)),                                 \
1554              "=&r" ((unsigned int)(sl))                                 \
1555            : "0" ((unsigned int)(ah)),                                  \
1556              "r" ((unsigned int)(bh)),                                  \
1557              "1" ((unsigned int)(al)),                                  \
1558              "rQR" ((unsigned int)(bl)))
1559 #define umul_ppmm(xh, xl, m0, m1) \
1560   do {                                                                  \
1561     union {long int __ll;                                               \
1562            struct {unsigned int __h, __l;} __i;                         \
1563           } __xx;                                                       \
1564     unsigned int __m0 = (m0), __m1 = (m1);                              \
1565     __asm__ ("mult      %S0,%H3"                                        \
1566              : "=r" (__xx.__i.__h),                                     \
1567                "=r" (__xx.__i.__l)                                      \
1568              : "%1" (__m0),                                             \
1569                "rQR" (__m1));                                           \
1570     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
1571     (xh) += ((((signed int) __m0 >> 15) & __m1)                         \
1572              + (((signed int) __m1 >> 15) & __m0));                     \
1573   } while (0)
1574 #endif /* __z8000__ */
1575
1576 #endif /* __GNUC__ */
1577
1578 /* If this machine has no inline assembler, use C macros.  */
1579
1580 #if !defined (add_ssaaaa)
1581 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1582   do {                                                                  \
1583     UWtype __x;                                                         \
1584     __x = (al) + (bl);                                                  \
1585     (sh) = (ah) + (bh) + (__x < (al));                                  \
1586     (sl) = __x;                                                         \
1587   } while (0)
1588 #endif
1589
1590 #if !defined (sub_ddmmss)
1591 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1592   do {                                                                  \
1593     UWtype __x;                                                         \
1594     __x = (al) - (bl);                                                  \
1595     (sh) = (ah) - (bh) - (__x > (al));                                  \
1596     (sl) = __x;                                                         \
1597   } while (0)
1598 #endif
1599
1600 /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1601    smul_ppmm.  */
1602 #if !defined (umul_ppmm) && defined (smul_ppmm)
1603 #define umul_ppmm(w1, w0, u, v)                                         \
1604   do {                                                                  \
1605     UWtype __w1;                                                        \
1606     UWtype __xm0 = (u), __xm1 = (v);                                    \
1607     smul_ppmm (__w1, w0, __xm0, __xm1);                                 \
1608     (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)               \
1609                 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);              \
1610   } while (0)
1611 #endif
1612
1613 /* If we still don't have umul_ppmm, define it using plain C.  */
1614 #if !defined (umul_ppmm)
1615 #define umul_ppmm(w1, w0, u, v)                                         \
1616   do {                                                                  \
1617     UWtype __x0, __x1, __x2, __x3;                                      \
1618     UHWtype __ul, __vl, __uh, __vh;                                     \
1619                                                                         \
1620     __ul = __ll_lowpart (u);                                            \
1621     __uh = __ll_highpart (u);                                           \
1622     __vl = __ll_lowpart (v);                                            \
1623     __vh = __ll_highpart (v);                                           \
1624                                                                         \
1625     __x0 = (UWtype) __ul * __vl;                                        \
1626     __x1 = (UWtype) __ul * __vh;                                        \
1627     __x2 = (UWtype) __uh * __vl;                                        \
1628     __x3 = (UWtype) __uh * __vh;                                        \
1629                                                                         \
1630     __x1 += __ll_highpart (__x0);/* this can't give carry */            \
1631     __x1 += __x2;               /* but this indeed can */               \
1632     if (__x1 < __x2)            /* did we get it? */                    \
1633       __x3 += __ll_B;           /* yes, add it in the proper pos.  */   \
1634                                                                         \
1635     (w1) = __x3 + __ll_highpart (__x1);                                 \
1636     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);          \
1637   } while (0)
1638 #endif
1639
1640 #if !defined (__umulsidi3)
1641 #define __umulsidi3(u, v) \
1642   ({DWunion __w;                                                        \
1643     umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
1644     __w.ll; })
1645 #endif
1646
1647 /* Define this unconditionally, so it can be used for debugging.  */
1648 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1649   do {                                                                  \
1650     UWtype __d1, __d0, __q1, __q0;                                      \
1651     UWtype __r1, __r0, __m;                                             \
1652     __d1 = __ll_highpart (d);                                           \
1653     __d0 = __ll_lowpart (d);                                            \
1654                                                                         \
1655     __r1 = (n1) % __d1;                                                 \
1656     __q1 = (n1) / __d1;                                                 \
1657     __m = (UWtype) __q1 * __d0;                                         \
1658     __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
1659     if (__r1 < __m)                                                     \
1660       {                                                                 \
1661         __q1--, __r1 += (d);                                            \
1662         if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1663           if (__r1 < __m)                                               \
1664             __q1--, __r1 += (d);                                        \
1665       }                                                                 \
1666     __r1 -= __m;                                                        \
1667                                                                         \
1668     __r0 = __r1 % __d1;                                                 \
1669     __q0 = __r1 / __d1;                                                 \
1670     __m = (UWtype) __q0 * __d0;                                         \
1671     __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
1672     if (__r0 < __m)                                                     \
1673       {                                                                 \
1674         __q0--, __r0 += (d);                                            \
1675         if (__r0 >= (d))                                                \
1676           if (__r0 < __m)                                               \
1677             __q0--, __r0 += (d);                                        \
1678       }                                                                 \
1679     __r0 -= __m;                                                        \
1680                                                                         \
1681     (q) = (UWtype) __q1 * __ll_B | __q0;                                \
1682     (r) = __r0;                                                         \
1683   } while (0)
1684
1685 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1686    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1687 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1688 #define udiv_qrnnd(q, r, nh, nl, d) \
1689   do {                                                                  \
1690     USItype __r;                                                        \
1691     (q) = __udiv_w_sdiv (&__r, nh, nl, d);                              \
1692     (r) = __r;                                                          \
1693   } while (0)
1694 #endif
1695
1696 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1697 #if !defined (udiv_qrnnd)
1698 #define UDIV_NEEDS_NORMALIZATION 1
1699 #define udiv_qrnnd __udiv_qrnnd_c
1700 #endif
1701
1702 #if !defined (count_leading_zeros)
1703 #define count_leading_zeros(count, x) \
1704   do {                                                                  \
1705     UWtype __xr = (x);                                                  \
1706     UWtype __a;                                                         \
1707                                                                         \
1708     if (W_TYPE_SIZE <= 32)                                              \
1709       {                                                                 \
1710         __a = __xr < ((UWtype)1<<2*__BITS4)                             \
1711           ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)                 \
1712           : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);   \
1713       }                                                                 \
1714     else                                                                \
1715       {                                                                 \
1716         for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)                  \
1717           if (((__xr >> __a) & 0xff) != 0)                              \
1718             break;                                                      \
1719       }                                                                 \
1720                                                                         \
1721     (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);             \
1722   } while (0)
1723 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1724 #endif
1725
1726 #if !defined (count_trailing_zeros)
1727 /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1728    defined in asm, but if it is not, the C version above is good enough.  */
1729 #define count_trailing_zeros(count, x) \
1730   do {                                                                  \
1731     UWtype __ctz_x = (x);                                               \
1732     UWtype __ctz_c;                                                     \
1733     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                  \
1734     (count) = W_TYPE_SIZE - 1 - __ctz_c;                                \
1735   } while (0)
1736 #endif
1737
1738 #ifndef UDIV_NEEDS_NORMALIZATION
1739 #define UDIV_NEEDS_NORMALIZATION 0
1740 #endif