packaging: Make glibc-devel symlinks consistent
[platform/upstream/glibc.git] / stdlib / longlong.h
1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
2    Copyright (C) 1991-2023 Free Software Foundation, Inc.
3
4    This file is part of the GNU C Library.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    In addition to the permissions in the GNU Lesser General Public
12    License, the Free Software Foundation gives you unlimited
13    permission to link the compiled version of this file into
14    combinations with other programs, and to distribute those
15    combinations without any restriction coming from the use of this
16    file.  (The Lesser General Public License restrictions do apply in
17    other respects; for example, they cover modification of the file,
18    and distribution when not linked into a combine executable.)
19
20    The GNU C Library is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    Lesser General Public License for more details.
24
25    You should have received a copy of the GNU Lesser General Public
26    License along with the GNU C Library; if not, see
27    <https://www.gnu.org/licenses/>.  */
28
29 /* You have to define the following before including this file:
30
31    UWtype -- An unsigned type, default type for operations (typically a "word")
32    UHWtype -- An unsigned type, at least half the size of UWtype.
33    UDWtype -- An unsigned type, at least twice as large a UWtype
34    W_TYPE_SIZE -- size in bits of UWtype
35
36    UQItype -- Unsigned 8 bit type.
37    SItype, USItype -- Signed and unsigned 32 bit types.
38    DItype, UDItype -- Signed and unsigned 64 bit types.
39
40    On a 32 bit machine UWtype should typically be USItype;
41    on a 64 bit machine, UWtype should typically be UDItype.  */
42
43 #define __BITS4 (W_TYPE_SIZE / 4)
44 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
45 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
46 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
47
48 #ifndef W_TYPE_SIZE
49 #define W_TYPE_SIZE     32
50 #define UWtype          USItype
51 #define UHWtype         USItype
52 #define UDWtype         UDItype
53 #endif
54
55 /* Used in glibc only.  */
56 #ifndef attribute_hidden
57 #define attribute_hidden
58 #endif
59
60 extern const UQItype __clz_tab[256] attribute_hidden;
61
62 /* Define auxiliary asm macros.
63
64    1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
65    UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
66    word product in HIGH_PROD and LOW_PROD.
67
68    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
69    UDWtype product.  This is just a variant of umul_ppmm.
70
71    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
72    denominator) divides a UDWtype, composed by the UWtype integers
73    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
74    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
75    than DENOMINATOR for correct operation.  If, in addition, the most
76    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
77    UDIV_NEEDS_NORMALIZATION is defined to 1.
78
79    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
80    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
81    is rounded towards 0.
82
83    5) count_leading_zeros(count, x) counts the number of zero-bits from the
84    msb to the first nonzero bit in the UWtype X.  This is the number of
85    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
86    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
87
88    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
89    from the least significant end.
90
91    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
92    high_addend_2, low_addend_2) adds two UWtype integers, composed by
93    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
94    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
95    (i.e. carry out) is not stored anywhere, and is lost.
96
97    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
98    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
99    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
100    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
101    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
102    and is lost.
103
104    If any of these macros are left undefined for a particular CPU,
105    C macros are used.  */
106
107 /* The CPUs come in alphabetical order below.
108
109    Please add support for more CPUs here, or improve the current support
110    for the CPUs below!
111    (E.g. WE32100, IBM360.)  */
112
113 #if defined (__GNUC__) && !defined (NO_ASM)
114
115 /* We sometimes need to clobber "cc" with gcc2, but that would not be
116    understood by gcc1.  Use cpp to avoid major code duplication.  */
117 #if __GNUC__ < 2
118 #define __CLOBBER_CC
119 #define __AND_CLOBBER_CC
120 #else /* __GNUC__ >= 2 */
121 #define __CLOBBER_CC : "cc"
122 #define __AND_CLOBBER_CC , "cc"
123 #endif /* __GNUC__ < 2 */
124
125 #if defined (__aarch64__)
126
127 #if W_TYPE_SIZE == 32
128 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz (X))
129 #define count_trailing_zeros(COUNT, X)   ((COUNT) = __builtin_ctz (X))
130 #define COUNT_LEADING_ZEROS_0 32
131 #endif /* W_TYPE_SIZE == 32 */
132
133 #if W_TYPE_SIZE == 64
134 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clzll (X))
135 #define count_trailing_zeros(COUNT, X)   ((COUNT) = __builtin_ctzll (X))
136 #define COUNT_LEADING_ZEROS_0 64
137 #endif /* W_TYPE_SIZE == 64 */
138
139 #endif /* __aarch64__ */
140
141 #if defined (__alpha) && W_TYPE_SIZE == 64
142 /* There is a bug in g++ before version 5 that
143    errors on __builtin_alpha_umulh.  */
144 #if !defined(__cplusplus) || __GNUC__ >= 5
145 #define umul_ppmm(ph, pl, m0, m1) \
146   do {                                                                  \
147     UDItype __m0 = (m0), __m1 = (m1);                                   \
148     (ph) = __builtin_alpha_umulh (__m0, __m1);                          \
149     (pl) = __m0 * __m1;                                                 \
150   } while (0)
151 #define UMUL_TIME 46
152 #endif /* !c++ */
153 #ifndef LONGLONG_STANDALONE
154 #define udiv_qrnnd(q, r, n1, n0, d) \
155   do { UDItype __r;                                                     \
156     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
157     (r) = __r;                                                          \
158   } while (0)
159 extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
160 #define UDIV_TIME 220
161 #endif /* LONGLONG_STANDALONE */
162 #ifdef __alpha_cix__
163 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
164 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
165 #define COUNT_LEADING_ZEROS_0 64
166 #else
167 #define count_leading_zeros(COUNT,X) \
168   do {                                                                  \
169     UDItype __xr = (X), __t, __a;                                       \
170     __t = __builtin_alpha_cmpbge (0, __xr);                             \
171     __a = __clz_tab[__t ^ 0xff] - 1;                                    \
172     __t = __builtin_alpha_extbl (__xr, __a);                            \
173     (COUNT) = 64 - (__clz_tab[__t] + __a*8);                            \
174   } while (0)
175 #define count_trailing_zeros(COUNT,X) \
176   do {                                                                  \
177     UDItype __xr = (X), __t, __a;                                       \
178     __t = __builtin_alpha_cmpbge (0, __xr);                             \
179     __t = ~__t & -~__t;                                                 \
180     __a = ((__t & 0xCC) != 0) * 2;                                      \
181     __a += ((__t & 0xF0) != 0) * 4;                                     \
182     __a += ((__t & 0xAA) != 0);                                         \
183     __t = __builtin_alpha_extbl (__xr, __a);                            \
184     __a <<= 3;                                                          \
185     __t &= -__t;                                                        \
186     __a += ((__t & 0xCC) != 0) * 2;                                     \
187     __a += ((__t & 0xF0) != 0) * 4;                                     \
188     __a += ((__t & 0xAA) != 0);                                         \
189     (COUNT) = __a;                                                      \
190   } while (0)
191 #endif /* __alpha_cix__ */
192 #endif /* __alpha */
193
194 #if defined (__arc__) && W_TYPE_SIZE == 32
195 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
196   __asm__ ("add.f       %1, %4, %5\n\tadc       %0, %2, %3"             \
197            : "=r" ((USItype) (sh)),                                     \
198              "=&r" ((USItype) (sl))                                     \
199            : "%r" ((USItype) (ah)),                                     \
200              "rICal" ((USItype) (bh)),                                  \
201              "%r" ((USItype) (al)),                                     \
202              "rICal" ((USItype) (bl))                                   \
203            : "cc")
204 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
205   __asm__ ("sub.f       %1, %4, %5\n\tsbc       %0, %2, %3"             \
206            : "=r" ((USItype) (sh)),                                     \
207              "=&r" ((USItype) (sl))                                     \
208            : "r" ((USItype) (ah)),                                      \
209              "rICal" ((USItype) (bh)),                                  \
210              "r" ((USItype) (al)),                                      \
211              "rICal" ((USItype) (bl))                                   \
212            : "cc")
213
214 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
215 #ifdef __ARC_NORM__
216 #define count_leading_zeros(count, x) \
217   do                                                                    \
218     {                                                                   \
219       SItype c_;                                                        \
220                                                                         \
221       __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\
222       (count) = c_ + 1;                                                 \
223     }                                                                   \
224   while (0)
225 #define COUNT_LEADING_ZEROS_0 32
226 #endif /* __ARC_NORM__ */
227 #endif /* __arc__ */
228
229 #if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
230  && W_TYPE_SIZE == 32
231 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
232   __asm__ ("adds        %1, %4, %5\n\tadc       %0, %2, %3"             \
233            : "=r" ((USItype) (sh)),                                     \
234              "=&r" ((USItype) (sl))                                     \
235            : "%r" ((USItype) (ah)),                                     \
236              "rI" ((USItype) (bh)),                                     \
237              "%r" ((USItype) (al)),                                     \
238              "rI" ((USItype) (bl)) __CLOBBER_CC)
239 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
240   __asm__ ("subs        %1, %4, %5\n\tsbc       %0, %2, %3"             \
241            : "=r" ((USItype) (sh)),                                     \
242              "=&r" ((USItype) (sl))                                     \
243            : "r" ((USItype) (ah)),                                      \
244              "rI" ((USItype) (bh)),                                     \
245              "r" ((USItype) (al)),                                      \
246              "rI" ((USItype) (bl)) __CLOBBER_CC)
247 # if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
248      || defined(__ARM_ARCH_3__)
249 #  define umul_ppmm(xh, xl, a, b)                                       \
250   do {                                                                  \
251     register USItype __t0, __t1, __t2;                                  \
252     __asm__ ("%@ Inlined umul_ppmm\n"                                   \
253            "    mov     %2, %5, lsr #16\n"                              \
254            "    mov     %0, %6, lsr #16\n"                              \
255            "    bic     %3, %5, %2, lsl #16\n"                          \
256            "    bic     %4, %6, %0, lsl #16\n"                          \
257            "    mul     %1, %3, %4\n"                                   \
258            "    mul     %4, %2, %4\n"                                   \
259            "    mul     %3, %0, %3\n"                                   \
260            "    mul     %0, %2, %0\n"                                   \
261            "    adds    %3, %4, %3\n"                                   \
262            "    addcs   %0, %0, #65536\n"                               \
263            "    adds    %1, %1, %3, lsl #16\n"                          \
264            "    adc     %0, %0, %3, lsr #16"                            \
265            : "=&r" ((USItype) (xh)),                                    \
266              "=r" ((USItype) (xl)),                                     \
267              "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
268            : "r" ((USItype) (a)),                                       \
269              "r" ((USItype) (b)) __CLOBBER_CC );                        \
270   } while (0)
271 #  define UMUL_TIME 20
272 # else
273 #  define umul_ppmm(xh, xl, a, b)                                       \
274   do {                                                                  \
275     /* Generate umull, under compiler control.  */                      \
276     register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b);       \
277     (xl) = (USItype)__t0;                                               \
278     (xh) = (USItype)(__t0 >> 32);                                       \
279   } while (0)
280 #  define UMUL_TIME 3
281 # endif
282 # define UDIV_TIME 100
283 #endif /* __arm__ */
284
285 #if defined(__arm__)
286 /* Let gcc decide how best to implement count_leading_zeros.  */
287 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
288 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctz (X))
289 #define COUNT_LEADING_ZEROS_0 32
290 #endif
291
292 #if defined (__AVR__)
293
294 #if W_TYPE_SIZE == 16
295 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
296 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
297 #define COUNT_LEADING_ZEROS_0 16
298 #endif /* W_TYPE_SIZE == 16 */
299
300 #if W_TYPE_SIZE == 32
301 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzl (X))
302 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
303 #define COUNT_LEADING_ZEROS_0 32
304 #endif /* W_TYPE_SIZE == 32 */
305
306 #if W_TYPE_SIZE == 64
307 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzll (X))
308 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
309 #define COUNT_LEADING_ZEROS_0 64
310 #endif /* W_TYPE_SIZE == 64 */
311
312 #endif /* defined (__AVR__) */
313
314 #if defined (__CRIS__)
315
316 #if __CRIS_arch_version >= 3
317 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
318 #define COUNT_LEADING_ZEROS_0 32
319 #endif /* __CRIS_arch_version >= 3 */
320
321 #if __CRIS_arch_version >= 8
322 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
323 #endif /* __CRIS_arch_version >= 8 */
324
325 #if __CRIS_arch_version >= 10
326 #define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v))
327 #else
328 #define __umulsidi3 __umulsidi3
329 extern UDItype __umulsidi3 (USItype, USItype);
330 #endif /* __CRIS_arch_version >= 10 */
331
332 #define umul_ppmm(w1, w0, u, v)         \
333   do {                                  \
334     UDItype __x = __umulsidi3 (u, v);   \
335     (w0) = (USItype) (__x);             \
336     (w1) = (USItype) (__x >> 32);       \
337   } while (0)
338
339 /* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for
340    DFmode ("double" intrinsics, avoiding two of the three insns handling
341    carry), but defining them as open-code C composing and doing the
342    operation in DImode (UDImode) shows that the DImode needs work:
343    register pressure from requiring neighboring registers and the
344    traffic to and from them come to dominate, in the 4.7 series.  */
345
346 #endif /* defined (__CRIS__) */
347
348 #if defined (__hppa) && W_TYPE_SIZE == 32
349 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
350   __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"                              \
351            : "=r" ((USItype) (sh)),                                     \
352              "=&r" ((USItype) (sl))                                     \
353            : "%rM" ((USItype) (ah)),                                    \
354              "rM" ((USItype) (bh)),                                     \
355              "%rM" ((USItype) (al)),                                    \
356              "rM" ((USItype) (bl)))
357 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
358   __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"                              \
359            : "=r" ((USItype) (sh)),                                     \
360              "=&r" ((USItype) (sl))                                     \
361            : "rM" ((USItype) (ah)),                                     \
362              "rM" ((USItype) (bh)),                                     \
363              "rM" ((USItype) (al)),                                     \
364              "rM" ((USItype) (bl)))
365 #if defined (_PA_RISC1_1)
366 #define umul_ppmm(w1, w0, u, v) \
367   do {                                                                  \
368     union                                                               \
369       {                                                                 \
370         UDItype __f;                                                    \
371         struct {USItype __w1, __w0;} __w1w0;                            \
372       } __t;                                                            \
373     __asm__ ("xmpyu %1,%2,%0"                                           \
374              : "=x" (__t.__f)                                           \
375              : "x" ((USItype) (u)),                                     \
376                "x" ((USItype) (v)));                                    \
377     (w1) = __t.__w1w0.__w1;                                             \
378     (w0) = __t.__w1w0.__w0;                                             \
379      } while (0)
380 #define UMUL_TIME 8
381 #else
382 #define UMUL_TIME 30
383 #endif
384 #define UDIV_TIME 40
385 #define count_leading_zeros(count, x) \
386   do {                                                                  \
387     USItype __tmp;                                                      \
388     __asm__ (                                                           \
389        "ldi             1,%0\n"                                         \
390 "       extru,=         %1,15,16,%%r0           ; Bits 31..16 zero?\n"  \
391 "       extru,tr        %1,15,16,%1             ; No.  Shift down, skip add.\n"\
392 "       ldo             16(%0),%0               ; Yes.  Perform add.\n" \
393 "       extru,=         %1,23,8,%%r0            ; Bits 15..8 zero?\n"   \
394 "       extru,tr        %1,23,8,%1              ; No.  Shift down, skip add.\n"\
395 "       ldo             8(%0),%0                ; Yes.  Perform add.\n" \
396 "       extru,=         %1,27,4,%%r0            ; Bits 7..4 zero?\n"    \
397 "       extru,tr        %1,27,4,%1              ; No.  Shift down, skip add.\n"\
398 "       ldo             4(%0),%0                ; Yes.  Perform add.\n" \
399 "       extru,=         %1,29,2,%%r0            ; Bits 3..2 zero?\n"    \
400 "       extru,tr        %1,29,2,%1              ; No.  Shift down, skip add.\n"\
401 "       ldo             2(%0),%0                ; Yes.  Perform add.\n" \
402 "       extru           %1,30,1,%1              ; Extract bit 1.\n"     \
403 "       sub             %0,%1,%0                ; Subtract it.\n"       \
404         : "=r" (count), "=r" (__tmp) : "1" (x));                        \
405   } while (0)
406 #endif
407
408 #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
409 #if !defined (__zarch__)
410 #define smul_ppmm(xh, xl, m0, m1) \
411   do {                                                                  \
412     union {DItype __ll;                                                 \
413            struct {USItype __h, __l;} __i;                              \
414           } __x;                                                        \
415     __asm__ ("lr %N0,%1\n\tmr %0,%2"                                    \
416              : "=&r" (__x.__ll)                                         \
417              : "r" (m0), "r" (m1));                                     \
418     (xh) = __x.__i.__h; (xl) = __x.__i.__l;                             \
419   } while (0)
420 #define sdiv_qrnnd(q, r, n1, n0, d) \
421   do {                                                                  \
422     union {DItype __ll;                                                 \
423            struct {USItype __h, __l;} __i;                              \
424           } __x;                                                        \
425     __x.__i.__h = n1; __x.__i.__l = n0;                                 \
426     __asm__ ("dr %0,%2"                                                 \
427              : "=r" (__x.__ll)                                          \
428              : "0" (__x.__ll), "r" (d));                                \
429     (q) = __x.__i.__l; (r) = __x.__i.__h;                               \
430   } while (0)
431 #else
432 #define smul_ppmm(xh, xl, m0, m1) \
433   do {                                                                  \
434     register SItype __r0 __asm__ ("0");                                 \
435     register SItype __r1 __asm__ ("1") = (m0);                          \
436                                                                         \
437     __asm__ ("mr\t%%r0,%3"                                              \
438              : "=r" (__r0), "=r" (__r1)                                 \
439              : "r"  (__r1),  "r" (m1));                                 \
440     (xh) = __r0; (xl) = __r1;                                           \
441   } while (0)
442
443 #define sdiv_qrnnd(q, r, n1, n0, d) \
444   do {                                                                  \
445     register SItype __r0 __asm__ ("0") = (n1);                          \
446     register SItype __r1 __asm__ ("1") = (n0);                          \
447                                                                         \
448     __asm__ ("dr\t%%r0,%4"                                              \
449              : "=r" (__r0), "=r" (__r1)                                 \
450              : "r" (__r0), "r" (__r1), "r" (d));                        \
451     (q) = __r1; (r) = __r0;                                             \
452   } while (0)
453 #endif /* __zarch__ */
454 #endif
455
456 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
457 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
458   __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"               \
459            : "=r" ((USItype) (sh)),                                     \
460              "=&r" ((USItype) (sl))                                     \
461            : "%0" ((USItype) (ah)),                                     \
462              "g" ((USItype) (bh)),                                      \
463              "%1" ((USItype) (al)),                                     \
464              "g" ((USItype) (bl)))
465 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
466   __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"               \
467            : "=r" ((USItype) (sh)),                                     \
468              "=&r" ((USItype) (sl))                                     \
469            : "0" ((USItype) (ah)),                                      \
470              "g" ((USItype) (bh)),                                      \
471              "1" ((USItype) (al)),                                      \
472              "g" ((USItype) (bl)))
473 #define umul_ppmm(w1, w0, u, v) \
474   __asm__ ("mul{l} %3"                                                  \
475            : "=a" ((USItype) (w0)),                                     \
476              "=d" ((USItype) (w1))                                      \
477            : "%0" ((USItype) (u)),                                      \
478              "rm" ((USItype) (v)))
479 #define udiv_qrnnd(q, r, n1, n0, dv) \
480   __asm__ ("div{l} %4"                                                  \
481            : "=a" ((USItype) (q)),                                      \
482              "=d" ((USItype) (r))                                       \
483            : "0" ((USItype) (n0)),                                      \
484              "1" ((USItype) (n1)),                                      \
485              "rm" ((USItype) (dv)))
486 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
487 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
488 #define UMUL_TIME 40
489 #define UDIV_TIME 40
490 #endif /* 80x86 */
491
492 #if defined (__x86_64__) && W_TYPE_SIZE == 64
493 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
494   __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"               \
495            : "=r" ((UDItype) (sh)),                                     \
496              "=&r" ((UDItype) (sl))                                     \
497            : "%0" ((UDItype) (ah)),                                     \
498              "rme" ((UDItype) (bh)),                                    \
499              "%1" ((UDItype) (al)),                                     \
500              "rme" ((UDItype) (bl)))
501 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
502   __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"               \
503            : "=r" ((UDItype) (sh)),                                     \
504              "=&r" ((UDItype) (sl))                                     \
505            : "0" ((UDItype) (ah)),                                      \
506              "rme" ((UDItype) (bh)),                                    \
507              "1" ((UDItype) (al)),                                      \
508              "rme" ((UDItype) (bl)))
509 #define umul_ppmm(w1, w0, u, v) \
510   __asm__ ("mul{q} %3"                                                  \
511            : "=a" ((UDItype) (w0)),                                     \
512              "=d" ((UDItype) (w1))                                      \
513            : "%0" ((UDItype) (u)),                                      \
514              "rm" ((UDItype) (v)))
515 #define udiv_qrnnd(q, r, n1, n0, dv) \
516   __asm__ ("div{q} %4"                                                  \
517            : "=a" ((UDItype) (q)),                                      \
518              "=d" ((UDItype) (r))                                       \
519            : "0" ((UDItype) (n0)),                                      \
520              "1" ((UDItype) (n1)),                                      \
521              "rm" ((UDItype) (dv)))
522 #define count_leading_zeros(count, x)   ((count) = __builtin_clzll (x))
523 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctzll (x))
524 #define UMUL_TIME 40
525 #define UDIV_TIME 40
526 #endif /* x86_64 */
527
528 #if defined (__i960__) && W_TYPE_SIZE == 32
529 #define umul_ppmm(w1, w0, u, v) \
530   ({union {UDItype __ll;                                                \
531            struct {USItype __l, __h;} __i;                              \
532           } __xx;                                                       \
533   __asm__ ("emul        %2,%1,%0"                                       \
534            : "=d" (__xx.__ll)                                           \
535            : "%dI" ((USItype) (u)),                                     \
536              "dI" ((USItype) (v)));                                     \
537   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
538 #define __umulsidi3(u, v) \
539   ({UDItype __w;                                                        \
540     __asm__ ("emul      %2,%1,%0"                                       \
541              : "=d" (__w)                                               \
542              : "%dI" ((USItype) (u)),                                   \
543                "dI" ((USItype) (v)));                                   \
544     __w; })
545 #endif /* __i960__ */
546
547 #if defined (__ia64) && W_TYPE_SIZE == 64
548 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
549    "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
550    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
551    register, which takes an extra cycle.  */
552 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
553   do {                                                                  \
554     UWtype __x;                                                         \
555     __x = (al) - (bl);                                                  \
556     if ((al) < (bl))                                                    \
557       (sh) = (ah) - (bh) - 1;                                           \
558     else                                                                \
559       (sh) = (ah) - (bh);                                               \
560     (sl) = __x;                                                         \
561   } while (0)
562
563 /* Do both product parts in assembly, since that gives better code with
564    all gcc versions.  Some callers will just use the upper part, and in
565    that situation we waste an instruction, but not any cycles.  */
566 #define umul_ppmm(ph, pl, m0, m1)                                       \
567   __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"            \
568            : "=&f" (ph), "=f" (pl)                                      \
569            : "f" (m0), "f" (m1))
570 #define count_leading_zeros(count, x)                                   \
571   do {                                                                  \
572     UWtype _x = (x), _y, _a, _c;                                        \
573     __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));              \
574     __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));            \
575     _c = (_a - 1) << 3;                                                 \
576     _x >>= _c;                                                          \
577     if (_x >= 1 << 4)                                                   \
578       _x >>= 4, _c += 4;                                                \
579     if (_x >= 1 << 2)                                                   \
580       _x >>= 2, _c += 2;                                                \
581     _c += _x >> 1;                                                      \
582     (count) =  W_TYPE_SIZE - 1 - _c;                                    \
583   } while (0)
584 /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
585    based, and we don't need a special case for x==0 here */
586 #define count_trailing_zeros(count, x)                                  \
587   do {                                                                  \
588     UWtype __ctz_x = (x);                                               \
589     __asm__ ("popcnt %0 = %1"                                           \
590              : "=r" (count)                                             \
591              : "r" ((__ctz_x-1) & ~__ctz_x));                           \
592   } while (0)
593 #define UMUL_TIME 14
594 #endif
595
596 #ifdef __loongarch__
597 # if W_TYPE_SIZE == 32
598 #  define count_leading_zeros(count, x)  ((count) = __builtin_clz (x))
599 #  define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
600 #  define COUNT_LEADING_ZEROS_0 32
601 # elif W_TYPE_SIZE == 64
602 #  define count_leading_zeros(count, x)  ((count) = __builtin_clzll (x))
603 #  define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x))
604 #  define COUNT_LEADING_ZEROS_0 64
605 # endif
606 #endif
607
608 #if defined (__M32R__) && W_TYPE_SIZE == 32
609 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
610   /* The cmp clears the condition bit.  */ \
611   __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"                      \
612            : "=r" ((USItype) (sh)),                                     \
613              "=&r" ((USItype) (sl))                                     \
614            : "0" ((USItype) (ah)),                                      \
615              "r" ((USItype) (bh)),                                      \
616              "1" ((USItype) (al)),                                      \
617              "r" ((USItype) (bl))                                       \
618            : "cbit")
619 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
620   /* The cmp clears the condition bit.  */ \
621   __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"                      \
622            : "=r" ((USItype) (sh)),                                     \
623              "=&r" ((USItype) (sl))                                     \
624            : "0" ((USItype) (ah)),                                      \
625              "r" ((USItype) (bh)),                                      \
626              "1" ((USItype) (al)),                                      \
627              "r" ((USItype) (bl))                                       \
628            : "cbit")
629 #endif /* __M32R__ */
630
631 #if defined (__mc68000__) && W_TYPE_SIZE == 32
632 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
633   __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"                              \
634            : "=d" ((USItype) (sh)),                                     \
635              "=&d" ((USItype) (sl))                                     \
636            : "%0" ((USItype) (ah)),                                     \
637              "d" ((USItype) (bh)),                                      \
638              "%1" ((USItype) (al)),                                     \
639              "g" ((USItype) (bl)))
640 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
641   __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"                              \
642            : "=d" ((USItype) (sh)),                                     \
643              "=&d" ((USItype) (sl))                                     \
644            : "0" ((USItype) (ah)),                                      \
645              "d" ((USItype) (bh)),                                      \
646              "1" ((USItype) (al)),                                      \
647              "g" ((USItype) (bl)))
648
649 /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
650 #if (defined (__mc68020__) && !defined (__mc68060__))
651 #define umul_ppmm(w1, w0, u, v) \
652   __asm__ ("mulu%.l %3,%1:%0"                                           \
653            : "=d" ((USItype) (w0)),                                     \
654              "=d" ((USItype) (w1))                                      \
655            : "%0" ((USItype) (u)),                                      \
656              "dmi" ((USItype) (v)))
657 #define UMUL_TIME 45
658 #define udiv_qrnnd(q, r, n1, n0, d) \
659   __asm__ ("divu%.l %4,%1:%0"                                           \
660            : "=d" ((USItype) (q)),                                      \
661              "=d" ((USItype) (r))                                       \
662            : "0" ((USItype) (n0)),                                      \
663              "1" ((USItype) (n1)),                                      \
664              "dmi" ((USItype) (d)))
665 #define UDIV_TIME 90
666 #define sdiv_qrnnd(q, r, n1, n0, d) \
667   __asm__ ("divs%.l %4,%1:%0"                                           \
668            : "=d" ((USItype) (q)),                                      \
669              "=d" ((USItype) (r))                                       \
670            : "0" ((USItype) (n0)),                                      \
671              "1" ((USItype) (n1)),                                      \
672              "dmi" ((USItype) (d)))
673
674 #elif defined (__mcoldfire__) /* not mc68020 */
675
676 #define umul_ppmm(xh, xl, a, b) \
677   __asm__ ("| Inlined umul_ppmm\n"                                      \
678            "    move%.l %2,%/d0\n"                                      \
679            "    move%.l %3,%/d1\n"                                      \
680            "    move%.l %/d0,%/d2\n"                                    \
681            "    swap    %/d0\n"                                         \
682            "    move%.l %/d1,%/d3\n"                                    \
683            "    swap    %/d1\n"                                         \
684            "    move%.w %/d2,%/d4\n"                                    \
685            "    mulu    %/d3,%/d4\n"                                    \
686            "    mulu    %/d1,%/d2\n"                                    \
687            "    mulu    %/d0,%/d3\n"                                    \
688            "    mulu    %/d0,%/d1\n"                                    \
689            "    move%.l %/d4,%/d0\n"                                    \
690            "    clr%.w  %/d0\n"                                         \
691            "    swap    %/d0\n"                                         \
692            "    add%.l  %/d0,%/d2\n"                                    \
693            "    add%.l  %/d3,%/d2\n"                                    \
694            "    jcc     1f\n"                                           \
695            "    add%.l  %#65536,%/d1\n"                                 \
696            "1:  swap    %/d2\n"                                         \
697            "    moveq   %#0,%/d0\n"                                     \
698            "    move%.w %/d2,%/d0\n"                                    \
699            "    move%.w %/d4,%/d2\n"                                    \
700            "    move%.l %/d2,%1\n"                                      \
701            "    add%.l  %/d1,%/d0\n"                                    \
702            "    move%.l %/d0,%0"                                        \
703            : "=g" ((USItype) (xh)),                                     \
704              "=g" ((USItype) (xl))                                      \
705            : "g" ((USItype) (a)),                                       \
706              "g" ((USItype) (b))                                        \
707            : "d0", "d1", "d2", "d3", "d4")
708 #define UMUL_TIME 100
709 #define UDIV_TIME 400
710 #else /* not ColdFire */
711 /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
712 #define umul_ppmm(xh, xl, a, b) \
713   __asm__ ("| Inlined umul_ppmm\n"                                      \
714            "    move%.l %2,%/d0\n"                                      \
715            "    move%.l %3,%/d1\n"                                      \
716            "    move%.l %/d0,%/d2\n"                                    \
717            "    swap    %/d0\n"                                         \
718            "    move%.l %/d1,%/d3\n"                                    \
719            "    swap    %/d1\n"                                         \
720            "    move%.w %/d2,%/d4\n"                                    \
721            "    mulu    %/d3,%/d4\n"                                    \
722            "    mulu    %/d1,%/d2\n"                                    \
723            "    mulu    %/d0,%/d3\n"                                    \
724            "    mulu    %/d0,%/d1\n"                                    \
725            "    move%.l %/d4,%/d0\n"                                    \
726            "    eor%.w  %/d0,%/d0\n"                                    \
727            "    swap    %/d0\n"                                         \
728            "    add%.l  %/d0,%/d2\n"                                    \
729            "    add%.l  %/d3,%/d2\n"                                    \
730            "    jcc     1f\n"                                           \
731            "    add%.l  %#65536,%/d1\n"                                 \
732            "1:  swap    %/d2\n"                                         \
733            "    moveq   %#0,%/d0\n"                                     \
734            "    move%.w %/d2,%/d0\n"                                    \
735            "    move%.w %/d4,%/d2\n"                                    \
736            "    move%.l %/d2,%1\n"                                      \
737            "    add%.l  %/d1,%/d0\n"                                    \
738            "    move%.l %/d0,%0"                                        \
739            : "=g" ((USItype) (xh)),                                     \
740              "=g" ((USItype) (xl))                                      \
741            : "g" ((USItype) (a)),                                       \
742              "g" ((USItype) (b))                                        \
743            : "d0", "d1", "d2", "d3", "d4")
744 #define UMUL_TIME 100
745 #define UDIV_TIME 400
746
747 #endif /* not mc68020 */
748
749 /* The '020, '030, '040 and '060 have bitfield insns.
750    cpu32 disguises as a 68020, but lacks them.  */
751 #if defined (__mc68020__) && !defined (__mcpu32__)
752 #define count_leading_zeros(count, x) \
753   __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
754            : "=d" ((USItype) (count))                                   \
755            : "od" ((USItype) (x)), "n" (0))
756 /* Some ColdFire architectures have a ff1 instruction supported via
757    __builtin_clz. */
758 #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
759 #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
760 #define COUNT_LEADING_ZEROS_0 32
761 #endif
762 #endif /* mc68000 */
763
764 #if defined (__m88000__) && W_TYPE_SIZE == 32
765 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
766   __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"                   \
767            : "=r" ((USItype) (sh)),                                     \
768              "=&r" ((USItype) (sl))                                     \
769            : "%rJ" ((USItype) (ah)),                                    \
770              "rJ" ((USItype) (bh)),                                     \
771              "%rJ" ((USItype) (al)),                                    \
772              "rJ" ((USItype) (bl)))
773 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
774   __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"                   \
775            : "=r" ((USItype) (sh)),                                     \
776              "=&r" ((USItype) (sl))                                     \
777            : "rJ" ((USItype) (ah)),                                     \
778              "rJ" ((USItype) (bh)),                                     \
779              "rJ" ((USItype) (al)),                                     \
780              "rJ" ((USItype) (bl)))
781 #define count_leading_zeros(count, x) \
782   do {                                                                  \
783     USItype __cbtmp;                                                    \
784     __asm__ ("ff1 %0,%1"                                                \
785              : "=r" (__cbtmp)                                           \
786              : "r" ((USItype) (x)));                                    \
787     (count) = __cbtmp ^ 31;                                             \
788   } while (0)
789 #define COUNT_LEADING_ZEROS_0 63 /* sic */
790 #if defined (__mc88110__)
791 #define umul_ppmm(wh, wl, u, v) \
792   do {                                                                  \
793     union {UDItype __ll;                                                \
794            struct {USItype __h, __l;} __i;                              \
795           } __xx;                                                       \
796     __asm__ ("mulu.d    %0,%1,%2"                                       \
797              : "=r" (__xx.__ll)                                         \
798              : "r" ((USItype) (u)),                                     \
799                "r" ((USItype) (v)));                                    \
800     (wh) = __xx.__i.__h;                                                \
801     (wl) = __xx.__i.__l;                                                \
802   } while (0)
803 #define udiv_qrnnd(q, r, n1, n0, d) \
804   ({union {UDItype __ll;                                                \
805            struct {USItype __h, __l;} __i;                              \
806           } __xx;                                                       \
807   USItype __q;                                                          \
808   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
809   __asm__ ("divu.d %0,%1,%2"                                            \
810            : "=r" (__q)                                                 \
811            : "r" (__xx.__ll),                                           \
812              "r" ((USItype) (d)));                                      \
813   (r) = (n0) - __q * (d); (q) = __q; })
814 #define UMUL_TIME 5
815 #define UDIV_TIME 25
816 #else
817 #define UMUL_TIME 17
818 #define UDIV_TIME 150
819 #endif /* __mc88110__ */
820 #endif /* __m88000__ */
821
822 #if defined (__mn10300__)
823 # if defined (__AM33__)
824 #  define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
825 #  define umul_ppmm(w1, w0, u, v)               \
826     asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
827 #  define smul_ppmm(w1, w0, u, v)               \
828     asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
829 # else
830 #  define umul_ppmm(w1, w0, u, v)               \
831     asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
832 #  define smul_ppmm(w1, w0, u, v)               \
833     asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
834 # endif
835 # define add_ssaaaa(sh, sl, ah, al, bh, bl)     \
836   do {                                          \
837     DWunion __s, __a, __b;                      \
838     __a.s.low = (al); __a.s.high = (ah);        \
839     __b.s.low = (bl); __b.s.high = (bh);        \
840     __s.ll = __a.ll + __b.ll;                   \
841     (sl) = __s.s.low; (sh) = __s.s.high;        \
842   } while (0)
843 # define sub_ddmmss(sh, sl, ah, al, bh, bl)     \
844   do {                                          \
845     DWunion __s, __a, __b;                      \
846     __a.s.low = (al); __a.s.high = (ah);        \
847     __b.s.low = (bl); __b.s.high = (bh);        \
848     __s.ll = __a.ll - __b.ll;                   \
849     (sl) = __s.s.low; (sh) = __s.s.high;        \
850   } while (0)
851 # define udiv_qrnnd(q, r, nh, nl, d)            \
852   asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
853 # define sdiv_qrnnd(q, r, nh, nl, d)            \
854   asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
855 # define UMUL_TIME 3
856 # define UDIV_TIME 38
857 #endif
858
859 #if defined (__mips__) && W_TYPE_SIZE == 32
860 #define umul_ppmm(w1, w0, u, v)                                         \
861   do {                                                                  \
862     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
863     (w1) = (USItype) (__x >> 32);                                       \
864     (w0) = (USItype) (__x);                                             \
865   } while (0)
866 #define UMUL_TIME 10
867 #define UDIV_TIME 100
868
869 #if (__mips == 32 || __mips == 64) && ! defined (__mips16)
870 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
871 #define COUNT_LEADING_ZEROS_0 32
872 #endif
873 #endif /* __mips__ */
874
875 /* FIXME: We should test _IBMR2 here when we add assembly support for the
876    system vendor compilers.
877    FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
878    enough, since that hits ARM and m68k too.  */
879 #if (defined (_ARCH_PPC)        /* AIX */                               \
880      || defined (__powerpc__)   /* gcc */                               \
881      || defined (__POWERPC__)   /* BEOS */                              \
882      || defined (__ppc__)       /* Darwin */                            \
883      || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
884      || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
885          && CPU_FAMILY == PPC)                                                \
886      ) && W_TYPE_SIZE == 32
887 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
888   do {                                                                  \
889     if (__builtin_constant_p (bh) && (bh) == 0)                         \
890       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"                \
891              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
892     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
893       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"                \
894              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
895     else                                                                \
896       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"              \
897              : "=r" (sh), "=&r" (sl)                                    \
898              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
899   } while (0)
900 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
901   do {                                                                  \
902     if (__builtin_constant_p (ah) && (ah) == 0)                         \
903       __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"      \
904                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
905     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)         \
906       __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"      \
907                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
908     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
909       __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"               \
910                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
911     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
912       __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"               \
913                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
914     else                                                                \
915       __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"    \
916                : "=r" (sh), "=&r" (sl)                                  \
917                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
918   } while (0)
919 #define count_leading_zeros(count, x) \
920   __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
921 #define COUNT_LEADING_ZEROS_0 32
922 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
923   || defined (__ppc__)                                                    \
924   || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
925   || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
926          && CPU_FAMILY == PPC)
927 #define umul_ppmm(ph, pl, m0, m1) \
928   do {                                                                  \
929     USItype __m0 = (m0), __m1 = (m1);                                   \
930     __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
931     (pl) = __m0 * __m1;                                                 \
932   } while (0)
933 #define UMUL_TIME 15
934 #define smul_ppmm(ph, pl, m0, m1) \
935   do {                                                                  \
936     SItype __m0 = (m0), __m1 = (m1);                                    \
937     __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
938     (pl) = __m0 * __m1;                                                 \
939   } while (0)
940 #define SMUL_TIME 14
941 #define UDIV_TIME 120
942 #endif
943 #endif /* 32-bit POWER architecture variants.  */
944
945 /* We should test _IBMR2 here when we add assembly support for the system
946    vendor compilers.  */
947 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
948 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
949   do {                                                                  \
950     if (__builtin_constant_p (bh) && (bh) == 0)                         \
951       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"                \
952              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
953     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
954       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"                \
955              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
956     else                                                                \
957       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"              \
958              : "=r" (sh), "=&r" (sl)                                    \
959              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
960   } while (0)
961 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
962   do {                                                                  \
963     if (__builtin_constant_p (ah) && (ah) == 0)                         \
964       __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"      \
965                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
966     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)         \
967       __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"      \
968                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
969     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
970       __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"               \
971                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
972     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
973       __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"               \
974                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
975     else                                                                \
976       __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"    \
977                : "=r" (sh), "=&r" (sl)                                  \
978                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
979   } while (0)
980 #define count_leading_zeros(count, x) \
981   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
982 #define COUNT_LEADING_ZEROS_0 64
983 #define umul_ppmm(ph, pl, m0, m1) \
984   do {                                                                  \
985     UDItype __m0 = (m0), __m1 = (m1);                                   \
986     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
987     (pl) = __m0 * __m1;                                                 \
988   } while (0)
989 #define UMUL_TIME 15
990 #define smul_ppmm(ph, pl, m0, m1) \
991   do {                                                                  \
992     DItype __m0 = (m0), __m1 = (m1);                                    \
993     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
994     (pl) = __m0 * __m1;                                                 \
995   } while (0)
996 #define SMUL_TIME 14  /* ??? */
997 #define UDIV_TIME 120 /* ??? */
998 #endif /* 64-bit PowerPC.  */
999
1000 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
1001 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1002   __asm__ ("a %1,%5\n\tae %0,%3"                                        \
1003            : "=r" ((USItype) (sh)),                                     \
1004              "=&r" ((USItype) (sl))                                     \
1005            : "%0" ((USItype) (ah)),                                     \
1006              "r" ((USItype) (bh)),                                      \
1007              "%1" ((USItype) (al)),                                     \
1008              "r" ((USItype) (bl)))
1009 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1010   __asm__ ("s %1,%5\n\tse %0,%3"                                        \
1011            : "=r" ((USItype) (sh)),                                     \
1012              "=&r" ((USItype) (sl))                                     \
1013            : "0" ((USItype) (ah)),                                      \
1014              "r" ((USItype) (bh)),                                      \
1015              "1" ((USItype) (al)),                                      \
1016              "r" ((USItype) (bl)))
1017 #define umul_ppmm(ph, pl, m0, m1) \
1018   do {                                                                  \
1019     USItype __m0 = (m0), __m1 = (m1);                                   \
1020     __asm__ (                                                           \
1021        "s       r2,r2\n"                                                \
1022 "       mts     r10,%2\n"                                               \
1023 "       m       r2,%3\n"                                                \
1024 "       m       r2,%3\n"                                                \
1025 "       m       r2,%3\n"                                                \
1026 "       m       r2,%3\n"                                                \
1027 "       m       r2,%3\n"                                                \
1028 "       m       r2,%3\n"                                                \
1029 "       m       r2,%3\n"                                                \
1030 "       m       r2,%3\n"                                                \
1031 "       m       r2,%3\n"                                                \
1032 "       m       r2,%3\n"                                                \
1033 "       m       r2,%3\n"                                                \
1034 "       m       r2,%3\n"                                                \
1035 "       m       r2,%3\n"                                                \
1036 "       m       r2,%3\n"                                                \
1037 "       m       r2,%3\n"                                                \
1038 "       m       r2,%3\n"                                                \
1039 "       cas     %0,r2,r0\n"                                             \
1040 "       mfs     r10,%1"                                                 \
1041              : "=r" ((USItype) (ph)),                                   \
1042                "=r" ((USItype) (pl))                                    \
1043              : "%r" (__m0),                                             \
1044                 "r" (__m1)                                              \
1045              : "r2");                                                   \
1046     (ph) += ((((SItype) __m0 >> 31) & __m1)                             \
1047              + (((SItype) __m1 >> 31) & __m0));                         \
1048   } while (0)
1049 #define UMUL_TIME 20
1050 #define UDIV_TIME 200
1051 #define count_leading_zeros(count, x) \
1052   do {                                                                  \
1053     if ((x) >= 0x10000)                                                 \
1054       __asm__ ("clz     %0,%1"                                          \
1055                : "=r" ((USItype) (count))                               \
1056                : "r" ((USItype) (x) >> 16));                            \
1057     else                                                                \
1058       {                                                                 \
1059         __asm__ ("clz   %0,%1"                                          \
1060                  : "=r" ((USItype) (count))                             \
1061                  : "r" ((USItype) (x)));                                        \
1062         (count) += 16;                                                  \
1063       }                                                                 \
1064   } while (0)
1065 #endif
1066
1067 #if defined(__riscv)
1068 #ifdef __riscv_mul
1069 #define __umulsidi3(u,v) ((UDWtype)(UWtype)(u) * (UWtype)(v))
1070 #define __muluw3(a, b) ((UWtype)(a) * (UWtype)(b))
1071 #else
1072 #if __riscv_xlen == 32
1073   #define MULUW3 "call __mulsi3"
1074 #elif __riscv_xlen == 64
1075   #define MULUW3 "call __muldi3"
1076 #else
1077 #error unsupport xlen
1078 #endif /* __riscv_xlen */
1079 /* We rely on the fact that MULUW3 doesn't clobber the t-registers.
1080    It can get better register allocation result.  */
1081 #define __muluw3(a, b) \
1082   ({ \
1083     register UWtype __op0 asm ("a0") = a; \
1084     register UWtype __op1 asm ("a1") = b; \
1085     asm volatile (MULUW3 \
1086                   : "+r" (__op0), "+r" (__op1) \
1087                   : \
1088                   : "ra", "a2", "a3"); \
1089     __op0; \
1090   })
1091 #endif /* __riscv_mul */
1092 #define umul_ppmm(w1, w0, u, v) \
1093   do { \
1094     UWtype __x0, __x1, __x2, __x3; \
1095     UHWtype __ul, __vl, __uh, __vh; \
1096  \
1097     __ul = __ll_lowpart (u); \
1098     __uh = __ll_highpart (u); \
1099     __vl = __ll_lowpart (v); \
1100     __vh = __ll_highpart (v); \
1101  \
1102     __x0 = __muluw3 (__ul, __vl); \
1103     __x1 = __muluw3 (__ul, __vh); \
1104     __x2 = __muluw3 (__uh, __vl); \
1105     __x3 = __muluw3 (__uh, __vh); \
1106  \
1107     __x1 += __ll_highpart (__x0);/* this can't give carry */ \
1108     __x1 += __x2; /* but this indeed can */ \
1109     if (__x1 < __x2) /* did we get it? */ \
1110       __x3 += __ll_B; /* yes, add it in the proper pos.  */ \
1111  \
1112     (w1) = __x3 + __ll_highpart (__x1); \
1113     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
1114   } while (0)
1115 #endif /* __riscv */
1116
1117 #if defined(__sh__) && W_TYPE_SIZE == 32
1118 #ifndef __sh1__
1119 #define umul_ppmm(w1, w0, u, v) \
1120   __asm__ (                                                             \
1121        "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0       mach,%0"        \
1122            : "=r<" ((USItype)(w1)),                                     \
1123              "=r<" ((USItype)(w0))                                      \
1124            : "r" ((USItype)(u)),                                        \
1125              "r" ((USItype)(v))                                         \
1126            : "macl", "mach")
1127 #define UMUL_TIME 5
1128 #endif
1129
1130 /* This is the same algorithm as __udiv_qrnnd_c.  */
1131 #define UDIV_NEEDS_NORMALIZATION 1
1132
1133 #ifdef __FDPIC__
1134 /* FDPIC needs a special version of the asm fragment to extract the
1135    code address from the function descriptor. __udiv_qrnnd_16 is
1136    assumed to be local and not to use the GOT, so loading r12 is
1137    not needed. */
1138 #define udiv_qrnnd(q, r, n1, n0, d) \
1139   do {                                                                  \
1140     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)                      \
1141                         __attribute__ ((visibility ("hidden")));        \
1142     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */  \
1143     __asm__ (                                                           \
1144         "mov%M4 %4,r5\n"                                                \
1145 "       swap.w  %3,r4\n"                                                \
1146 "       swap.w  r5,r6\n"                                                \
1147 "       mov.l   @%5,r2\n"                                               \
1148 "       jsr     @r2\n"                                                  \
1149 "       shll16  r6\n"                                                   \
1150 "       swap.w  r4,r4\n"                                                \
1151 "       mov.l   @%5,r2\n"                                               \
1152 "       jsr     @r2\n"                                                  \
1153 "       swap.w  r1,%0\n"                                                \
1154 "       or      r1,%0"                                                  \
1155         : "=r" (q), "=&z" (r)                                           \
1156         : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)          \
1157         : "r1", "r2", "r4", "r5", "r6", "pr", "t");                     \
1158   } while (0)
1159 #else
1160 #define udiv_qrnnd(q, r, n1, n0, d) \
1161   do {                                                                  \
1162     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)                      \
1163                         __attribute__ ((visibility ("hidden")));        \
1164     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */  \
1165     __asm__ (                                                           \
1166         "mov%M4 %4,r5\n"                                                \
1167 "       swap.w %3,r4\n"                                                 \
1168 "       swap.w r5,r6\n"                                                 \
1169 "       jsr @%5\n"                                                      \
1170 "       shll16 r6\n"                                                    \
1171 "       swap.w r4,r4\n"                                                 \
1172 "       jsr @%5\n"                                                      \
1173 "       swap.w r1,%0\n"                                                 \
1174 "       or r1,%0"                                                       \
1175         : "=r" (q), "=&z" (r)                                           \
1176         : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)          \
1177         : "r1", "r2", "r4", "r5", "r6", "pr", "t");                     \
1178   } while (0)
1179 #endif /* __FDPIC__  */
1180
1181 #define UDIV_TIME 80
1182
1183 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1184   __asm__ ("clrt;subc %5,%1; subc %4,%0"                                \
1185            : "=r" (sh), "=r" (sl)                                       \
1186            : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
1187
1188 #endif /* __sh__ */
1189
1190 #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1191     && W_TYPE_SIZE == 32
1192 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1193   __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"                          \
1194            : "=r" ((USItype) (sh)),                                     \
1195              "=&r" ((USItype) (sl))                                     \
1196            : "%rJ" ((USItype) (ah)),                                    \
1197              "rI" ((USItype) (bh)),                                     \
1198              "%rJ" ((USItype) (al)),                                    \
1199              "rI" ((USItype) (bl))                                      \
1200            __CLOBBER_CC)
1201 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1202   __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"                          \
1203            : "=r" ((USItype) (sh)),                                     \
1204              "=&r" ((USItype) (sl))                                     \
1205            : "rJ" ((USItype) (ah)),                                     \
1206              "rI" ((USItype) (bh)),                                     \
1207              "rJ" ((USItype) (al)),                                     \
1208              "rI" ((USItype) (bl))                                      \
1209            __CLOBBER_CC)
1210 #if defined (__sparc_v9__)
1211 #define umul_ppmm(w1, w0, u, v) \
1212   do {                                                                  \
1213     register USItype __g1 asm ("g1");                                   \
1214     __asm__ ("umul\t%2,%3,%1\n\t"                                       \
1215              "srlx\t%1, 32, %0"                                         \
1216              : "=r" ((USItype) (w1)),                                   \
1217                "=r" (__g1)                                              \
1218              : "r" ((USItype) (u)),                                     \
1219                "r" ((USItype) (v)));                                    \
1220     (w0) = __g1;                                                        \
1221   } while (0)
1222 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1223   __asm__ ("mov\t%2,%%y\n\t"                                            \
1224            "udiv\t%3,%4,%0\n\t"                                         \
1225            "umul\t%0,%4,%1\n\t"                                         \
1226            "sub\t%3,%1,%1"                                              \
1227            : "=&r" ((USItype) (__q)),                                   \
1228              "=&r" ((USItype) (__r))                                    \
1229            : "r" ((USItype) (__n1)),                                    \
1230              "r" ((USItype) (__n0)),                                    \
1231              "r" ((USItype) (__d)))
1232 #else
1233 #if defined (__sparc_v8__)
1234 #define umul_ppmm(w1, w0, u, v) \
1235   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1236            : "=r" ((USItype) (w1)),                                     \
1237              "=r" ((USItype) (w0))                                      \
1238            : "r" ((USItype) (u)),                                       \
1239              "r" ((USItype) (v)))
1240 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1241   __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
1242            : "=&r" ((USItype) (__q)),                                   \
1243              "=&r" ((USItype) (__r))                                    \
1244            : "r" ((USItype) (__n1)),                                    \
1245              "r" ((USItype) (__n0)),                                    \
1246              "r" ((USItype) (__d)))
1247 #else
1248 #if defined (__sparclite__)
1249 /* This has hardware multiply but not divide.  It also has two additional
1250    instructions scan (ffs from high bit) and divscc.  */
1251 #define umul_ppmm(w1, w0, u, v) \
1252   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1253            : "=r" ((USItype) (w1)),                                     \
1254              "=r" ((USItype) (w0))                                      \
1255            : "r" ((USItype) (u)),                                       \
1256              "r" ((USItype) (v)))
1257 #define udiv_qrnnd(q, r, n1, n0, d) \
1258   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1259 "       wr      %%g0,%2,%%y     ! Not a delayed write for sparclite\n"  \
1260 "       tst     %%g0\n"                                                 \
1261 "       divscc  %3,%4,%%g1\n"                                           \
1262 "       divscc  %%g1,%4,%%g1\n"                                         \
1263 "       divscc  %%g1,%4,%%g1\n"                                         \
1264 "       divscc  %%g1,%4,%%g1\n"                                         \
1265 "       divscc  %%g1,%4,%%g1\n"                                         \
1266 "       divscc  %%g1,%4,%%g1\n"                                         \
1267 "       divscc  %%g1,%4,%%g1\n"                                         \
1268 "       divscc  %%g1,%4,%%g1\n"                                         \
1269 "       divscc  %%g1,%4,%%g1\n"                                         \
1270 "       divscc  %%g1,%4,%%g1\n"                                         \
1271 "       divscc  %%g1,%4,%%g1\n"                                         \
1272 "       divscc  %%g1,%4,%%g1\n"                                         \
1273 "       divscc  %%g1,%4,%%g1\n"                                         \
1274 "       divscc  %%g1,%4,%%g1\n"                                         \
1275 "       divscc  %%g1,%4,%%g1\n"                                         \
1276 "       divscc  %%g1,%4,%%g1\n"                                         \
1277 "       divscc  %%g1,%4,%%g1\n"                                         \
1278 "       divscc  %%g1,%4,%%g1\n"                                         \
1279 "       divscc  %%g1,%4,%%g1\n"                                         \
1280 "       divscc  %%g1,%4,%%g1\n"                                         \
1281 "       divscc  %%g1,%4,%%g1\n"                                         \
1282 "       divscc  %%g1,%4,%%g1\n"                                         \
1283 "       divscc  %%g1,%4,%%g1\n"                                         \
1284 "       divscc  %%g1,%4,%%g1\n"                                         \
1285 "       divscc  %%g1,%4,%%g1\n"                                         \
1286 "       divscc  %%g1,%4,%%g1\n"                                         \
1287 "       divscc  %%g1,%4,%%g1\n"                                         \
1288 "       divscc  %%g1,%4,%%g1\n"                                         \
1289 "       divscc  %%g1,%4,%%g1\n"                                         \
1290 "       divscc  %%g1,%4,%%g1\n"                                         \
1291 "       divscc  %%g1,%4,%%g1\n"                                         \
1292 "       divscc  %%g1,%4,%0\n"                                           \
1293 "       rd      %%y,%1\n"                                               \
1294 "       bl,a 1f\n"                                                      \
1295 "       add     %1,%4,%1\n"                                             \
1296 "1:     ! End of inline udiv_qrnnd"                                     \
1297            : "=r" ((USItype) (q)),                                      \
1298              "=r" ((USItype) (r))                                       \
1299            : "r" ((USItype) (n1)),                                      \
1300              "r" ((USItype) (n0)),                                      \
1301              "rI" ((USItype) (d))                                       \
1302            : "g1" __AND_CLOBBER_CC)
1303 #define UDIV_TIME 37
1304 #define count_leading_zeros(count, x) \
1305   do {                                                                  \
1306   __asm__ ("scan %1,1,%0"                                               \
1307            : "=r" ((USItype) (count))                                   \
1308            : "r" ((USItype) (x)));                                      \
1309   } while (0)
1310 /* Early sparclites return 63 for an argument of 0, but they warn that future
1311    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1312    undefined.  */
1313 #else
1314 /* SPARC without integer multiplication and divide instructions.
1315    (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1316 #define umul_ppmm(w1, w0, u, v) \
1317   __asm__ ("! Inlined umul_ppmm\n"                                      \
1318 "       wr      %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n"\
1319 "       sra     %3,31,%%o5      ! Don't move this insn\n"               \
1320 "       and     %2,%%o5,%%o5    ! Don't move this insn\n"               \
1321 "       andcc   %%g0,0,%%g1     ! Don't move this insn\n"               \
1322 "       mulscc  %%g1,%3,%%g1\n"                                         \
1323 "       mulscc  %%g1,%3,%%g1\n"                                         \
1324 "       mulscc  %%g1,%3,%%g1\n"                                         \
1325 "       mulscc  %%g1,%3,%%g1\n"                                         \
1326 "       mulscc  %%g1,%3,%%g1\n"                                         \
1327 "       mulscc  %%g1,%3,%%g1\n"                                         \
1328 "       mulscc  %%g1,%3,%%g1\n"                                         \
1329 "       mulscc  %%g1,%3,%%g1\n"                                         \
1330 "       mulscc  %%g1,%3,%%g1\n"                                         \
1331 "       mulscc  %%g1,%3,%%g1\n"                                         \
1332 "       mulscc  %%g1,%3,%%g1\n"                                         \
1333 "       mulscc  %%g1,%3,%%g1\n"                                         \
1334 "       mulscc  %%g1,%3,%%g1\n"                                         \
1335 "       mulscc  %%g1,%3,%%g1\n"                                         \
1336 "       mulscc  %%g1,%3,%%g1\n"                                         \
1337 "       mulscc  %%g1,%3,%%g1\n"                                         \
1338 "       mulscc  %%g1,%3,%%g1\n"                                         \
1339 "       mulscc  %%g1,%3,%%g1\n"                                         \
1340 "       mulscc  %%g1,%3,%%g1\n"                                         \
1341 "       mulscc  %%g1,%3,%%g1\n"                                         \
1342 "       mulscc  %%g1,%3,%%g1\n"                                         \
1343 "       mulscc  %%g1,%3,%%g1\n"                                         \
1344 "       mulscc  %%g1,%3,%%g1\n"                                         \
1345 "       mulscc  %%g1,%3,%%g1\n"                                         \
1346 "       mulscc  %%g1,%3,%%g1\n"                                         \
1347 "       mulscc  %%g1,%3,%%g1\n"                                         \
1348 "       mulscc  %%g1,%3,%%g1\n"                                         \
1349 "       mulscc  %%g1,%3,%%g1\n"                                         \
1350 "       mulscc  %%g1,%3,%%g1\n"                                         \
1351 "       mulscc  %%g1,%3,%%g1\n"                                         \
1352 "       mulscc  %%g1,%3,%%g1\n"                                         \
1353 "       mulscc  %%g1,%3,%%g1\n"                                         \
1354 "       mulscc  %%g1,0,%%g1\n"                                          \
1355 "       add     %%g1,%%o5,%0\n"                                         \
1356 "       rd      %%y,%1"                                                 \
1357            : "=r" ((USItype) (w1)),                                     \
1358              "=r" ((USItype) (w0))                                      \
1359            : "%rI" ((USItype) (u)),                                     \
1360              "r" ((USItype) (v))                                                \
1361            : "g1", "o5" __AND_CLOBBER_CC)
1362 #define UMUL_TIME 39            /* 39 instructions */
1363 /* It's quite necessary to add this much assembler for the sparc.
1364    The default udiv_qrnnd (in C) is more than 10 times slower!  */
1365 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1366   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1367 "       mov     32,%%g1\n"                                              \
1368 "       subcc   %1,%2,%%g0\n"                                           \
1369 "1:     bcs     5f\n"                                                   \
1370 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1371 "       sub     %1,%2,%1        ! this kills msb of n\n"                \
1372 "       addx    %1,%1,%1        ! so this can't give carry\n"           \
1373 "       subcc   %%g1,1,%%g1\n"                                          \
1374 "2:     bne     1b\n"                                                   \
1375 "        subcc  %1,%2,%%g0\n"                                           \
1376 "       bcs     3f\n"                                                   \
1377 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1378 "       b       3f\n"                                                   \
1379 "        sub    %1,%2,%1        ! this kills msb of n\n"                \
1380 "4:     sub     %1,%2,%1\n"                                             \
1381 "5:     addxcc  %1,%1,%1\n"                                             \
1382 "       bcc     2b\n"                                                   \
1383 "        subcc  %%g1,1,%%g1\n"                                          \
1384 "! Got carry from n.  Subtract next step to cancel this carry.\n"       \
1385 "       bne     4b\n"                                                   \
1386 "        addcc  %0,%0,%0        ! shift n1n0 and a 0-bit in lsb\n"      \
1387 "       sub     %1,%2,%1\n"                                             \
1388 "3:     xnor    %0,0,%0\n"                                              \
1389 "       ! End of inline udiv_qrnnd"                                     \
1390            : "=&r" ((USItype) (__q)),                                   \
1391              "=&r" ((USItype) (__r))                                    \
1392            : "r" ((USItype) (__d)),                                     \
1393              "1" ((USItype) (__n1)),                                    \
1394              "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1395 #define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
1396 #endif /* __sparclite__ */
1397 #endif /* __sparc_v8__ */
1398 #endif /* __sparc_v9__ */
1399 #endif /* sparc32 */
1400
1401 #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1402     && W_TYPE_SIZE == 64
1403 #define add_ssaaaa(sh, sl, ah, al, bh, bl)                              \
1404   do {                                                                  \
1405     UDItype __carry = 0;                                                \
1406     __asm__ ("addcc\t%r5,%6,%1\n\t"                                     \
1407              "add\t%r3,%4,%0\n\t"                                       \
1408              "movcs\t%%xcc, 1, %2\n\t"                                  \
1409              "add\t%0, %2, %0"                                          \
1410              : "=r" ((UDItype)(sh)),                                    \
1411                "=&r" ((UDItype)(sl)),                                   \
1412                "+r" (__carry)                                           \
1413              : "%rJ" ((UDItype)(ah)),                                   \
1414                "rI" ((UDItype)(bh)),                                    \
1415                "%rJ" ((UDItype)(al)),                                   \
1416                "rI" ((UDItype)(bl))                                     \
1417              __CLOBBER_CC);                                             \
1418   } while (0)
1419
1420 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1421   do {                                                                  \
1422     UDItype __carry = 0;                                                \
1423     __asm__ ("subcc\t%r5,%6,%1\n\t"                                     \
1424              "sub\t%r3,%4,%0\n\t"                                       \
1425              "movcs\t%%xcc, 1, %2\n\t"                                  \
1426              "sub\t%0, %2, %0"                                          \
1427              : "=r" ((UDItype)(sh)),                                    \
1428                "=&r" ((UDItype)(sl)),                                   \
1429                "+r" (__carry)                                           \
1430              : "%rJ" ((UDItype)(ah)),                                   \
1431                "rI" ((UDItype)(bh)),                                    \
1432                "%rJ" ((UDItype)(al)),                                   \
1433                "rI" ((UDItype)(bl))                                     \
1434              __CLOBBER_CC);                                             \
1435   } while (0)
1436
1437 #define umul_ppmm(wh, wl, u, v)                                         \
1438   do {                                                                  \
1439           UDItype tmp1, tmp2, tmp3, tmp4;                               \
1440           __asm__ __volatile__ (                                        \
1441                    "srl %7,0,%3\n\t"                                    \
1442                    "mulx %3,%6,%1\n\t"                                  \
1443                    "srlx %6,32,%2\n\t"                                  \
1444                    "mulx %2,%3,%4\n\t"                                  \
1445                    "sllx %4,32,%5\n\t"                                  \
1446                    "srl %6,0,%3\n\t"                                    \
1447                    "sub %1,%5,%5\n\t"                                   \
1448                    "srlx %5,32,%5\n\t"                                  \
1449                    "addcc %4,%5,%4\n\t"                                 \
1450                    "srlx %7,32,%5\n\t"                                  \
1451                    "mulx %3,%5,%3\n\t"                                  \
1452                    "mulx %2,%5,%5\n\t"                                  \
1453                    "sethi %%hi(0x80000000),%2\n\t"                      \
1454                    "addcc %4,%3,%4\n\t"                                 \
1455                    "srlx %4,32,%4\n\t"                                  \
1456                    "add %2,%2,%2\n\t"                                   \
1457                    "movcc %%xcc,%%g0,%2\n\t"                            \
1458                    "addcc %5,%4,%5\n\t"                                 \
1459                    "sllx %3,32,%3\n\t"                                  \
1460                    "add %1,%3,%1\n\t"                                   \
1461                    "add %5,%2,%0"                                       \
1462            : "=r" ((UDItype)(wh)),                                      \
1463              "=&r" ((UDItype)(wl)),                                     \
1464              "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)     \
1465            : "r" ((UDItype)(u)),                                        \
1466              "r" ((UDItype)(v))                                         \
1467            __CLOBBER_CC);                                               \
1468   } while (0)
1469 #define UMUL_TIME 96
1470 #define UDIV_TIME 230
1471 #endif /* sparc64 */
1472
1473 #if defined (__vax__) && W_TYPE_SIZE == 32
1474 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1475   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                  \
1476            : "=g" ((USItype) (sh)),                                     \
1477              "=&g" ((USItype) (sl))                                     \
1478            : "%0" ((USItype) (ah)),                                     \
1479              "g" ((USItype) (bh)),                                      \
1480              "%1" ((USItype) (al)),                                     \
1481              "g" ((USItype) (bl)))
1482 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1483   __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"                                  \
1484            : "=g" ((USItype) (sh)),                                     \
1485              "=&g" ((USItype) (sl))                                     \
1486            : "0" ((USItype) (ah)),                                      \
1487              "g" ((USItype) (bh)),                                      \
1488              "1" ((USItype) (al)),                                      \
1489              "g" ((USItype) (bl)))
1490 #define umul_ppmm(xh, xl, m0, m1) \
1491   do {                                                                  \
1492     union {                                                             \
1493         UDItype __ll;                                                   \
1494         struct {USItype __l, __h;} __i;                                 \
1495       } __xx;                                                           \
1496     USItype __m0 = (m0), __m1 = (m1);                                   \
1497     __asm__ ("emul %1,%2,$0,%0"                                         \
1498              : "=r" (__xx.__ll)                                         \
1499              : "g" (__m0),                                              \
1500                "g" (__m1));                                             \
1501     (xh) = __xx.__i.__h;                                                \
1502     (xl) = __xx.__i.__l;                                                \
1503     (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
1504              + (((SItype) __m1 >> 31) & __m0));                         \
1505   } while (0)
1506 #define sdiv_qrnnd(q, r, n1, n0, d) \
1507   do {                                                                  \
1508     union {DItype __ll;                                                 \
1509            struct {SItype __l, __h;} __i;                               \
1510           } __xx;                                                       \
1511     __xx.__i.__h = n1; __xx.__i.__l = n0;                               \
1512     __asm__ ("ediv %3,%2,%0,%1"                                         \
1513              : "=g" (q), "=g" (r)                                       \
1514              : "g" (__xx.__ll), "g" (d));                               \
1515   } while (0)
1516 #endif /* __vax__ */
1517
1518 #ifdef _TMS320C6X
1519 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1520   do                                                                    \
1521     {                                                                   \
1522       UDItype __ll;                                                     \
1523       __asm__ ("addu .l1 %1, %2, %0"                                    \
1524                : "=a" (__ll) : "a" (al), "a" (bl));                     \
1525       (sl) = (USItype)__ll;                                             \
1526       (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh);                     \
1527     }                                                                   \
1528   while (0)
1529
1530 #ifdef _TMS320C6400_PLUS
1531 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1532 #define umul_ppmm(w1, w0, u, v)                                         \
1533   do {                                                                  \
1534     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
1535     (w1) = (USItype) (__x >> 32);                                       \
1536     (w0) = (USItype) (__x);                                             \
1537   } while (0)
1538 #endif  /* _TMS320C6400_PLUS */
1539
1540 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
1541 #ifdef _TMS320C6400
1542 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
1543 #endif
1544 #define UMUL_TIME 4
1545 #define UDIV_TIME 40
1546 #endif /* _TMS320C6X */
1547
1548 #if defined (__xtensa__) && W_TYPE_SIZE == 32
1549 /* This code is not Xtensa-configuration-specific, so rely on the compiler
1550    to expand builtin functions depending on what configuration features
1551    are available.  This avoids library calls when the operation can be
1552    performed in-line.  */
1553 #define umul_ppmm(w1, w0, u, v)                                         \
1554   do {                                                                  \
1555     DWunion __w;                                                        \
1556     __w.ll = __builtin_umulsidi3 (u, v);                                \
1557     w1 = __w.s.high;                                                    \
1558     w0 = __w.s.low;                                                     \
1559   } while (0)
1560 #define __umulsidi3(u, v)               __builtin_umulsidi3 (u, v)
1561 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz (X))
1562 #define count_trailing_zeros(COUNT, X)  ((COUNT) = __builtin_ctz (X))
1563 #endif /* __xtensa__ */
1564
1565 #if defined xstormy16
1566 extern UHItype __stormy16_count_leading_zeros (UHItype);
1567 #define count_leading_zeros(count, x)                                   \
1568   do                                                                    \
1569     {                                                                   \
1570       UHItype size;                                                     \
1571                                                                         \
1572       /* We assume that W_TYPE_SIZE is a multiple of 16...  */          \
1573       for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16)           \
1574         {                                                               \
1575           UHItype c;                                                    \
1576                                                                         \
1577           c = __clzhi2 ((x) >> (size - 16));                            \
1578           (count) += c;                                                 \
1579           if (c != 16)                                                  \
1580             break;                                                      \
1581         }                                                               \
1582     }                                                                   \
1583   while (0)
1584 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1585 #endif
1586
1587 #if defined (__z8000__) && W_TYPE_SIZE == 16
1588 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1589   __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
1590            : "=r" ((unsigned int)(sh)),                                 \
1591              "=&r" ((unsigned int)(sl))                                 \
1592            : "%0" ((unsigned int)(ah)),                                 \
1593              "r" ((unsigned int)(bh)),                                  \
1594              "%1" ((unsigned int)(al)),                                 \
1595              "rQR" ((unsigned int)(bl)))
1596 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1597   __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
1598            : "=r" ((unsigned int)(sh)),                                 \
1599              "=&r" ((unsigned int)(sl))                                 \
1600            : "0" ((unsigned int)(ah)),                                  \
1601              "r" ((unsigned int)(bh)),                                  \
1602              "1" ((unsigned int)(al)),                                  \
1603              "rQR" ((unsigned int)(bl)))
1604 #define umul_ppmm(xh, xl, m0, m1) \
1605   do {                                                                  \
1606     union {long int __ll;                                               \
1607            struct {unsigned int __h, __l;} __i;                         \
1608           } __xx;                                                       \
1609     unsigned int __m0 = (m0), __m1 = (m1);                              \
1610     __asm__ ("mult      %S0,%H3"                                        \
1611              : "=r" (__xx.__i.__h),                                     \
1612                "=r" (__xx.__i.__l)                                      \
1613              : "%1" (__m0),                                             \
1614                "rQR" (__m1));                                           \
1615     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
1616     (xh) += ((((signed int) __m0 >> 15) & __m1)                         \
1617              + (((signed int) __m1 >> 15) & __m0));                     \
1618   } while (0)
1619 #endif /* __z8000__ */
1620
1621 #endif /* __GNUC__ */
1622
1623 /* If this machine has no inline assembler, use C macros.  */
1624
1625 #if !defined (add_ssaaaa)
1626 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1627   do {                                                                  \
1628     UWtype __x;                                                         \
1629     __x = (al) + (bl);                                                  \
1630     (sh) = (ah) + (bh) + (__x < (al));                                  \
1631     (sl) = __x;                                                         \
1632   } while (0)
1633 #endif
1634
1635 #if !defined (sub_ddmmss)
1636 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1637   do {                                                                  \
1638     UWtype __x;                                                         \
1639     __x = (al) - (bl);                                                  \
1640     (sh) = (ah) - (bh) - (__x > (al));                                  \
1641     (sl) = __x;                                                         \
1642   } while (0)
1643 #endif
1644
1645 /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1646    smul_ppmm.  */
1647 #if !defined (umul_ppmm) && defined (smul_ppmm)
1648 #define umul_ppmm(w1, w0, u, v)                                         \
1649   do {                                                                  \
1650     UWtype __w1;                                                        \
1651     UWtype __xm0 = (u), __xm1 = (v);                                    \
1652     smul_ppmm (__w1, w0, __xm0, __xm1);                                 \
1653     (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)               \
1654                 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);              \
1655   } while (0)
1656 #endif
1657
1658 /* If we still don't have umul_ppmm, define it using plain C.  */
1659 #if !defined (umul_ppmm)
1660 #define umul_ppmm(w1, w0, u, v)                                         \
1661   do {                                                                  \
1662     UWtype __x0, __x1, __x2, __x3;                                      \
1663     UHWtype __ul, __vl, __uh, __vh;                                     \
1664                                                                         \
1665     __ul = __ll_lowpart (u);                                            \
1666     __uh = __ll_highpart (u);                                           \
1667     __vl = __ll_lowpart (v);                                            \
1668     __vh = __ll_highpart (v);                                           \
1669                                                                         \
1670     __x0 = (UWtype) __ul * __vl;                                        \
1671     __x1 = (UWtype) __ul * __vh;                                        \
1672     __x2 = (UWtype) __uh * __vl;                                        \
1673     __x3 = (UWtype) __uh * __vh;                                        \
1674                                                                         \
1675     __x1 += __ll_highpart (__x0);/* this can't give carry */            \
1676     __x1 += __x2;               /* but this indeed can */               \
1677     if (__x1 < __x2)            /* did we get it? */                    \
1678       __x3 += __ll_B;           /* yes, add it in the proper pos.  */   \
1679                                                                         \
1680     (w1) = __x3 + __ll_highpart (__x1);                                 \
1681     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);          \
1682   } while (0)
1683 #endif
1684
1685 #if !defined (__umulsidi3)
1686 #define __umulsidi3(u, v) \
1687   ({DWunion __w;                                                        \
1688     umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
1689     __w.ll; })
1690 #endif
1691
1692 /* Define this unconditionally, so it can be used for debugging.  */
1693 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1694   do {                                                                  \
1695     UWtype __d1, __d0, __q1, __q0;                                      \
1696     UWtype __r1, __r0, __m;                                             \
1697     __d1 = __ll_highpart (d);                                           \
1698     __d0 = __ll_lowpart (d);                                            \
1699                                                                         \
1700     __r1 = (n1) % __d1;                                                 \
1701     __q1 = (n1) / __d1;                                                 \
1702     __m = (UWtype) __q1 * __d0;                                         \
1703     __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
1704     if (__r1 < __m)                                                     \
1705       {                                                                 \
1706         __q1--, __r1 += (d);                                            \
1707         if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1708           if (__r1 < __m)                                               \
1709             __q1--, __r1 += (d);                                        \
1710       }                                                                 \
1711     __r1 -= __m;                                                        \
1712                                                                         \
1713     __r0 = __r1 % __d1;                                                 \
1714     __q0 = __r1 / __d1;                                                 \
1715     __m = (UWtype) __q0 * __d0;                                         \
1716     __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
1717     if (__r0 < __m)                                                     \
1718       {                                                                 \
1719         __q0--, __r0 += (d);                                            \
1720         if (__r0 >= (d))                                                \
1721           if (__r0 < __m)                                               \
1722             __q0--, __r0 += (d);                                        \
1723       }                                                                 \
1724     __r0 -= __m;                                                        \
1725                                                                         \
1726     (q) = (UWtype) __q1 * __ll_B | __q0;                                \
1727     (r) = __r0;                                                         \
1728   } while (0)
1729
1730 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1731    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1732 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1733 #define udiv_qrnnd(q, r, nh, nl, d) \
1734   do {                                                                  \
1735     extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype);     \
1736     UWtype __r;                                                         \
1737     (q) = __udiv_w_sdiv (&__r, nh, nl, d);                              \
1738     (r) = __r;                                                          \
1739   } while (0)
1740 #endif
1741
1742 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1743 #if !defined (udiv_qrnnd)
1744 #define UDIV_NEEDS_NORMALIZATION 1
1745 #define udiv_qrnnd __udiv_qrnnd_c
1746 #endif
1747
1748 #if !defined (count_leading_zeros)
1749 #define count_leading_zeros(count, x) \
1750   do {                                                                  \
1751     UWtype __xr = (x);                                                  \
1752     UWtype __a;                                                         \
1753                                                                         \
1754     if (W_TYPE_SIZE <= 32)                                              \
1755       {                                                                 \
1756         __a = __xr < ((UWtype)1<<2*__BITS4)                             \
1757           ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)                 \
1758           : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);   \
1759       }                                                                 \
1760     else                                                                \
1761       {                                                                 \
1762         for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)                  \
1763           if (((__xr >> __a) & 0xff) != 0)                              \
1764             break;                                                      \
1765       }                                                                 \
1766                                                                         \
1767     (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);             \
1768   } while (0)
1769 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1770 #endif
1771
1772 #if !defined (count_trailing_zeros)
1773 /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1774    defined in asm, but if it is not, the C version above is good enough.  */
1775 #define count_trailing_zeros(count, x) \
1776   do {                                                                  \
1777     UWtype __ctz_x = (x);                                               \
1778     UWtype __ctz_c;                                                     \
1779     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                  \
1780     (count) = W_TYPE_SIZE - 1 - __ctz_c;                                \
1781   } while (0)
1782 #endif
1783
1784 #ifndef UDIV_NEEDS_NORMALIZATION
1785 #define UDIV_NEEDS_NORMALIZATION 0
1786 #endif