1 /* UltraSPARC 64 support macros.
3 THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
4 CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
5 FUTURE GNU MP RELEASES.
7 Copyright 2003 Free Software Foundation, Inc.
9 This file is part of the GNU MP Library.
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 License for more details.
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25 #define LOW32(x) ((x) & 0xFFFFFFFF)
26 #define HIGH32(x) ((x) >> 32)
29 /* Halfword number i in src is accessed as src[i+HALF_ENDIAN_ADJ(i)].
30 Plain src[i] would be incorrect in big endian, HALF_ENDIAN_ADJ has the
31 effect of swapping the two halves in this case. */
32 #if HAVE_LIMB_BIG_ENDIAN
33 #define HALF_ENDIAN_ADJ(i) (1 - (((i) & 1) << 1)) /* +1 even, -1 odd */
35 #if HAVE_LIMB_LITTLE_ENDIAN
36 #define HALF_ENDIAN_ADJ(i) 0 /* no adjust */
38 #ifndef HALF_ENDIAN_ADJ
39 Error, error, unknown limb endianness;
43 /* umul_ppmm_lowequal sets h to the high limb of q*d, assuming the low limb
44 of that product is equal to l. dh and dl are the 32-bit halves of d.
46 |-----high----||----low-----|
57 | | pl = ql * dl (not calculated)
60 Knowing that the low 64 bits is equal to l means that LOW(pm1) + LOW(pm2)
61 + HIGH(pl) == HIGH(l). The only thing we need from those product parts
62 is whether they produce a carry into the high.
64 pm_l = LOW(pm1)+LOW(pm2) is done to contribute its carry, then the only
65 time there's a further carry from LOW(pm_l)+HIGH(pl) is if LOW(pm_l) >
66 HIGH(l). pl is never actually calculated. */
68 #define umul_ppmm_lowequal(h, q, d, dh, dl, l) \
70 mp_limb_t ql, qh, ph, pm1, pm2, pm_l; \
71 ASSERT (dh == HIGH32(d)); \
72 ASSERT (dl == LOW32(d)); \
82 pm_l = LOW32 (pm1) + LOW32 (pm2); \
84 (h) = ph + HIGH32 (pm1) + HIGH32 (pm2) \
85 + HIGH32 (pm_l) + ((pm_l << 32) > l); \
87 ASSERT_HIGH_PRODUCT (h, q, d); \
91 /* Set h to the high of q*d, assuming the low limb of that product is equal
92 to l, and that d fits in 32-bits.
94 |-----high----||----low-----|
99 | | pl = ql * dl (not calculated)
102 Knowing that LOW(pm) + HIGH(pl) == HIGH(l) (mod 2^32) means that the only
103 time there's a carry from that sum is when LOW(pm) > HIGH(l). There's no
104 need to calculate pl to determine this. */
106 #define umul_ppmm_half_lowequal(h, q, d, l) \
110 ASSERT (HIGH32(d) == 0); \
112 pm = HIGH32(q) * d; \
113 (h) = HIGH32(pm) + ((pm << 32) > l); \
114 ASSERT_HIGH_PRODUCT (h, q, d); \
118 /* check that h is the high limb of x*y */
120 #define ASSERT_HIGH_PRODUCT(h, x, y) \
122 mp_limb_t want_h, dummy; \
123 umul_ppmm (want_h, dummy, x, y); \
124 ASSERT (h == want_h); \
127 #define ASSERT_HIGH_PRODUCT(h, q, d) \
132 /* Count the leading zeros on a limb, but assuming it fits in 32 bits.
133 The count returned will be in the range 32 to 63.
134 This is the 32-bit generic C count_leading_zeros from longlong.h. */
135 #define count_leading_zeros_32(count, x) \
137 mp_limb_t __xr = (x); \
140 ASSERT ((x) <= CNST_LIMB(0xFFFFFFFF)); \
141 __a = __xr < ((UWtype) 1 << 16) ? (__xr < ((UWtype) 1 << 8) ? 1 : 8 + 1) \
142 : (__xr < ((UWtype) 1 << 24) ? 16 + 1 : 24 + 1); \
144 (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a]; \
148 /* Set inv to a 32-bit inverse floor((b*(b-d)-1) / d), knowing that d fits
149 32 bits and is normalized (high bit set). */
150 #define invert_half_limb(inv, d) \
153 ASSERT ((d) <= 0xFFFFFFFF); \
154 ASSERT ((d) & 0x80000000); \
155 _n = (((mp_limb_t) -(d)) << 32) - 1; \
156 (inv) = (mp_limb_t) (unsigned) (_n / (d)); \
160 /* Divide nh:nl by d, setting q to the quotient and r to the remainder.
161 q, r, nh and nl are 32-bits each, d_limb is 32-bits but in an mp_limb_t,
162 dinv_limb is similarly a 32-bit inverse but in an mp_limb_t. */
164 #define udiv_qrnnd_half_preinv(q, r, nh, nl, d_limb, dinv_limb) \
166 unsigned _n2, _n10, _n1, _nadj, _q11n, _xh, _r, _q; \
168 ASSERT (d_limb <= 0xFFFFFFFF); \
169 ASSERT (dinv_limb <= 0xFFFFFFFF); \
170 ASSERT (d_limb & 0x80000000); \
171 ASSERT (nh < d_limb); \
174 _n1 = (int) _n10 >> 31; \
175 _nadj = _n10 + (_n1 & d_limb); \
176 _x = dinv_limb * (_n2 - _n1) + _nadj; \
177 _q11n = ~(_n2 + HIGH32 (_x)); /* -q1-1 */ \
178 _n = ((mp_limb_t) _n2 << 32) + _n10; \
179 _x = _n + d_limb * _q11n; /* n-q1*d-d */ \
180 _xh = HIGH32 (_x) - d_limb; /* high(n-q1*d-d) */ \
181 ASSERT (_xh == 0 || _xh == ~0); \
182 _r = _x + (d_limb & _xh); /* addback */ \
183 _q = _xh - _q11n; /* q1+1-addback */ \
184 ASSERT (_r < d_limb); \
185 ASSERT (d_limb * _q + _r == _n); \