mpn/generic/gcdext_1.c

   1 /* mpn_gcdext -- Extended Greatest Common Divisor.
   2
   3 Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
   4 Foundation, Inc.
   5
   6 This file is part of the GNU MP Library.
   7
   8 The GNU MP Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MP Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  20
  21 #include "gmp.h"
  22 #include "gmp-impl.h"
  23 #include "longlong.h"
  24
  25 #ifndef GCDEXT_1_USE_BINARY
  26 #define GCDEXT_1_USE_BINARY 0
  27 #endif
  28
  29 #ifndef GCDEXT_1_BINARY_METHOD
  30 #define GCDEXT_1_BINARY_METHOD 2
  31 #endif
  32
  33 #ifndef USE_ZEROTAB
  34 #define USE_ZEROTAB 1
  35 #endif
  36
  37 #if GCDEXT_1_USE_BINARY
  38
  39 #if USE_ZEROTAB
  40 static unsigned char zerotab[0x40] = {
  41   6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  42   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  43   5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  44   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
  45 };
  46 #endif
  47
  48 mp_limb_t
  49 mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
  50               mp_limb_t u, mp_limb_t v)
  51 {
  52   /* Maintain
  53
  54      U = t1 u + t0 v
  55      V = s1 u + s0 v
  56
  57      where U, V are the inputs (without any shared power of two),
  58      and the matris has determinant ± 2^{shift}.
  59   */
  60   mp_limb_t s0 = 1;
  61   mp_limb_t t0 = 0;
  62   mp_limb_t s1 = 0;
  63   mp_limb_t t1 = 1;
  64   mp_limb_t ug;
  65   mp_limb_t vg;
  66   mp_limb_t ugh;
  67   mp_limb_t vgh;
  68   unsigned zero_bits;
  69   unsigned shift;
  70   unsigned i;
  71 #if GCDEXT_1_BINARY_METHOD == 2
  72   mp_limb_t det_sign;
  73 #endif
  74
  75   ASSERT (u > 0);
  76   ASSERT (v > 0);
  77
  78   count_trailing_zeros (zero_bits, u | v);
  79   u >>= zero_bits;
  80   v >>= zero_bits;
  81
  82   if ((u & 1) == 0)
  83     {
  84       count_trailing_zeros (shift, u);
  85       u >>= shift;
  86       t1 <<= shift;
  87     }
  88   else if ((v & 1) == 0)
  89     {
  90       count_trailing_zeros (shift, v);
  91       v >>= shift;
  92       s0 <<= shift;
  93     }
  94   else
  95     shift = 0;
  96
  97 #if GCDEXT_1_BINARY_METHOD == 1
  98   while (u != v)
  99     {
 100       unsigned count;
 101       if (u > v)
 102         {
 103           u -= v;
 104 #if USE_ZEROTAB
 105           count = zerotab [u & 0x3f];
 106           u >>= count;
 107           if (UNLIKELY (count == 6))
 108             {
 109               unsigned c;
 110               do
 111                 {
 112                   c = zerotab[u & 0x3f];
 113                   u >>= c;
 114                   count += c;
 115                 }
 116               while (c == 6);
 117             }
 118 #else
 119           count_trailing_zeros (count, u);
 120           u >>= count;
 121 #endif
 122           t0 += t1; t1 <<= count;
 123           s0 += s1; s1 <<= count;
 124         }
 125       else
 126         {
 127           v -= u;
 128 #if USE_ZEROTAB
 129           count = zerotab [v & 0x3f];
 130           v >>= count;
 131           if (UNLIKELY (count == 6))
 132             {
 133               unsigned c;
 134               do
 135                 {
 136                   c = zerotab[v & 0x3f];
 137                   v >>= c;
 138                   count += c;
 139                 }
 140               while (c == 6);
 141             }
 142 #else
 143           count_trailing_zeros (count, v);
 144           v >>= count;
 145 #endif
 146           t1 += t0; t0 <<= count;
 147           s1 += s0; s0 <<= count;
 148         }
 149       shift += count;
 150     }
 151 #else
 152 # if GCDEXT_1_BINARY_METHOD == 2
 153   u >>= 1;
 154   v >>= 1;
 155
 156   det_sign = 0;
 157
 158   while (u != v)
 159     {
 160       unsigned count;
 161       mp_limb_t d =  u - v;
 162       mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
 163       mp_limb_t sx;
 164       mp_limb_t tx;
 165
 166       /* When v <= u (vgtu == 0), the updates are:
 167
 168            (u; v)   <-- ( (u - v) >> count; v)    (det = +(1<<count) for corr. M factor)
 169            (t1, t0) <-- (t1 << count, t0 + t1)
 170
 171          and when v > 0, the updates are
 172
 173            (u; v)   <-- ( (v - u) >> count; u)    (det = -(1<<count))
 174            (t1, t0) <-- (t0 << count, t0 + t1)
 175
 176          and similarly for s1, s0
 177       */
 178
 179       /* v <-- min (u, v) */
 180       v += (vgtu & d);
 181
 182       /* u <-- |u - v| */
 183       u = (d ^ vgtu) - vgtu;
 184
 185       /* Number of trailing zeros is the same no matter if we look at
 186        * d or u, but using d gives more parallelism. */
 187 #if USE_ZEROTAB
 188       count = zerotab[d & 0x3f];
 189       if (UNLIKELY (count == 6))
 190         {
 191           unsigned c = 6;
 192           do
 193             {
 194               d >>= c;
 195               c = zerotab[d & 0x3f];
 196               count += c;
 197             }
 198           while (c == 6);
 199         }
 200 #else
 201       count_trailing_zeros (count, d);
 202 #endif
 203       det_sign ^= vgtu;
 204
 205       tx = vgtu & (t0 - t1);
 206       sx = vgtu & (s0 - s1);
 207       t0 += t1;
 208       s0 += s1;
 209       t1 += tx;
 210       s1 += sx;
 211
 212       count++;
 213       u >>= count;
 214       t1 <<= count;
 215       s1 <<= count;
 216       shift += count;
 217     }
 218   u = (u << 1) + 1;
 219 # else /* GCDEXT_1_BINARY_METHOD == 2 */
 220 #  error Unknown GCDEXT_1_BINARY_METHOD
 221 # endif
 222 #endif
 223
 224   /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
 225   ug = t0 + t1;
 226   vg = s0 + s1;
 227
 228   ugh = ug/2 + (ug & 1);
 229   vgh = vg/2 + (vg & 1);
 230
 231   /* Now ±2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
 232      s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
 233   for (i = 0; i < shift; i++)
 234     {
 235       mp_limb_t mask = - ( (s0 | t0) & 1);
 236
 237       s0 /= 2;
 238       t0 /= 2;
 239       s0 += mask & vgh;
 240       t0 += mask & ugh;
 241     }
 242   /* FIXME: Try simplifying this condition. */
 243   if ( (s0 > 1 && 2*s0 >= vg) || (t0 > 1 && 2*t0 >= ug) )
 244     {
 245       s0 -= vg;
 246       t0 -= ug;
 247     }
 248 #if GCDEXT_1_BINARY_METHOD == 2
 249   /* Conditional negation. */
 250   s0 = (s0 ^ det_sign) - det_sign;
 251   t0 = (t0 ^ det_sign) - det_sign;
 252 #endif
 253   *sp = s0;
 254   *tp = -t0;
 255
 256   return u << zero_bits;
 257 }
 258
 259 #else /* !GCDEXT_1_USE_BINARY */
 260
 261
 262 /* FIXME: Takes two single-word limbs. It could be extended to a
 263  * function that accepts a bignum for the first input, and only
 264  * returns the first co-factor. */
 265
 266 mp_limb_t
 267 mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
 268               mp_limb_t a, mp_limb_t b)
 269 {
 270   /* Maintain
 271
 272      a =  u0 A + v0 B
 273      b =  u1 A + v1 B
 274
 275      where A, B are the original inputs.
 276   */
 277   mp_limb_signed_t u0 = 1;
 278   mp_limb_signed_t v0 = 0;
 279   mp_limb_signed_t u1 = 0;
 280   mp_limb_signed_t v1 = 1;
 281
 282   ASSERT (a > 0);
 283   ASSERT (b > 0);
 284
 285   if (a < b)
 286     goto divide_by_b;
 287
 288   for (;;)
 289     {
 290       mp_limb_t q;
 291
 292       q = a / b;
 293       a -= q * b;
 294
 295       if (a == 0)
 296         {
 297           *up = u1;
 298           *vp = v1;
 299           return b;
 300         }
 301       u0 -= q * u1;
 302       v0 -= q * v1;
 303
 304     divide_by_b:
 305       q = b / a;
 306       b -= q * a;
 307
 308       if (b == 0)
 309         {
 310           *up = u0;
 311           *vp = v0;
 312           return a;
 313         }
 314       u1 -= q * u0;
 315       v1 -= q * v0;
 316     }
 317 }
 318 #endif /* !GCDEXT_1_USE_BINARY */