contrib/libgmp/mpn/generic/gcdext.c

   1 /* mpn_gcdext -- Extended Greatest Common Divisor.
   2
   3 Copyright (C) 1996 Free Software Foundation, Inc.
   4
   5 This file is part of the GNU MP Library.
   6
   7 The GNU MP Library is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU Library General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or (at your
  10 option) any later version.
  11
  12 The GNU MP Library is distributed in the hope that it will be useful, but
  13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
  15 License for more details.
  16
  17 You should have received a copy of the GNU Library General Public License
  18 along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  20 MA 02111-1307, USA. */
  21
  22 #include "gmp.h"
  23 #include "gmp-impl.h"
  24 #include "longlong.h"
  25
  26 #ifndef EXTEND
  27 #define EXTEND 1
  28 #endif
  29
  30 #if STAT
  31 int arr[BITS_PER_MP_LIMB];
  32 #endif
  33
  34 #define SGN(A) (((A) < 0) ? -1 : ((A) > 0))
  35
  36 /* Idea 1: After we have performed a full division, don't shift operands back,
  37            but instead account for the extra factors-of-2 thus introduced.
  38    Idea 2: Simple generalization to use divide-and-conquer would give us an
  39            algorithm that runs faster than O(n^2).
  40    Idea 3: The input numbers need less space as the computation progresses,
  41            while the s0 and s1 variables need more space.  To save space, we
  42            could make them share space, and have the latter variables grow
  43            into the former.  */
  44
  45 /* Precondition: U >= V.  */
  46
  47 mp_size_t
  48 #if EXTEND
  49 #if __STDC__
  50 mpn_gcdext (mp_ptr gp, mp_ptr s0p,
  51             mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
  52 #else
  53 mpn_gcdext (gp, s0p, up, size, vp, vsize)
  54      mp_ptr gp;
  55      mp_ptr s0p;
  56      mp_ptr up;
  57      mp_size_t size;
  58      mp_ptr vp;
  59      mp_size_t vsize;
  60 #endif
  61 #else
  62 #if __STDC__
  63 mpn_gcd (mp_ptr gp,
  64          mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
  65 #else
  66 mpn_gcd (gp, up, size, vp, vsize)
  67      mp_ptr gp;
  68      mp_ptr up;
  69      mp_size_t size;
  70      mp_ptr vp;
  71      mp_size_t vsize;
  72 #endif
  73 #endif
  74 {
  75   mp_limb_t uh, vh;
  76   mp_limb_signed_t A, B, C, D;
  77   int cnt;
  78   mp_ptr tp, wp;
  79 #if RECORD
  80   mp_limb_signed_t min = 0, max = 0;
  81 #endif
  82 #if EXTEND
  83   mp_ptr s1p;
  84   mp_ptr orig_s0p = s0p;
  85   mp_size_t ssize, orig_size = size;
  86   TMP_DECL (mark);
  87
  88   TMP_MARK (mark);
  89
  90   tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
  91   wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
  92   s1p = (mp_ptr) TMP_ALLOC (size * BYTES_PER_MP_LIMB);
  93
  94   MPN_ZERO (s0p, size);
  95   MPN_ZERO (s1p, size);
  96
  97   s0p[0] = 1;
  98   s1p[0] = 0;
  99   ssize = 1;
 100 #endif
 101
 102   if (size > vsize)
 103     {
 104       /* Normalize V (and shift up U the same amount).  */
 105       count_leading_zeros (cnt, vp[vsize - 1]);
 106       if (cnt != 0)
 107         {
 108           mp_limb_t cy;
 109           mpn_lshift (vp, vp, vsize, cnt);
 110           cy = mpn_lshift (up, up, size, cnt);
 111           up[size] = cy;
 112           size += cy != 0;
 113         }
 114
 115       mpn_divmod (up + vsize, up, size, vp, vsize);
 116 #if EXTEND
 117       /* This is really what it boils down to in this case... */
 118       s0p[0] = 0;
 119       s1p[0] = 1;
 120 #endif
 121       size = vsize;
 122       if (cnt != 0)
 123         {
 124           mpn_rshift (up, up, size, cnt);
 125           mpn_rshift (vp, vp, size, cnt);
 126         }
 127       {
 128         mp_ptr xp;
 129         xp = up; up = vp; vp = xp;
 130       }
 131     }
 132
 133   for (;;)
 134     {
 135       /* Figure out exact size of V.  */
 136       vsize = size;
 137       MPN_NORMALIZE (vp, vsize);
 138       if (vsize <= 1)
 139         break;
 140
 141       /* Make UH be the most significant limb of U, and make VH be
 142          corresponding bits from V.  */
 143       uh = up[size - 1];
 144       vh = vp[size - 1];
 145       count_leading_zeros (cnt, uh);
 146       if (cnt != 0)
 147         {
 148           uh = (uh << cnt) | (up[size - 2] >> (BITS_PER_MP_LIMB - cnt));
 149           vh = (vh << cnt) | (vp[size - 2] >> (BITS_PER_MP_LIMB - cnt));
 150         }
 151
 152 #if 0
 153       /* For now, only handle BITS_PER_MP_LIMB-1 bits.  This makes
 154          room for sign bit.  */
 155       uh >>= 1;
 156       vh >>= 1;
 157 #endif
 158       A = 1;
 159       B = 0;
 160       C = 0;
 161       D = 1;
 162
 163       for (;;)
 164         {
 165           mp_limb_signed_t q, T;
 166           if (vh + C == 0 || vh + D == 0)
 167             break;
 168
 169           q = (uh + A) / (vh + C);
 170           if (q != (uh + B) / (vh + D))
 171             break;
 172
 173           T = A - q * C;
 174           A = C;
 175           C = T;
 176           T = B - q * D;
 177           B = D;
 178           D = T;
 179           T = uh - q * vh;
 180           uh = vh;
 181           vh = T;
 182         }
 183
 184 #if RECORD
 185       min = MIN (A, min);  min = MIN (B, min);
 186       min = MIN (C, min);  min = MIN (D, min);
 187       max = MAX (A, max);  max = MAX (B, max);
 188       max = MAX (C, max);  max = MAX (D, max);
 189 #endif
 190
 191       if (B == 0)
 192         {
 193           mp_limb_t qh;
 194           mp_size_t i;
 195
 196           /* This is quite rare.  I.e., optimize something else!  */
 197
 198           /* Normalize V (and shift up U the same amount).  */
 199           count_leading_zeros (cnt, vp[vsize - 1]);
 200           if (cnt != 0)
 201             {
 202               mp_limb_t cy;
 203               mpn_lshift (vp, vp, vsize, cnt);
 204               cy = mpn_lshift (up, up, size, cnt);
 205               up[size] = cy;
 206               size += cy != 0;
 207             }
 208
 209           qh = mpn_divmod (up + vsize, up, size, vp, vsize);
 210 #if EXTEND
 211           MPN_COPY (tp, s0p, ssize);
 212           for (i = 0; i < size - vsize; i++)
 213             {
 214               mp_limb_t cy;
 215               cy = mpn_addmul_1 (tp + i, s1p, ssize, up[vsize + i]);
 216               if (cy != 0)
 217                 tp[ssize++] = cy;
 218             }
 219           if (qh != 0)
 220             {
 221               mp_limb_t cy;
 222               abort ();
 223               /* XXX since qh == 1, mpn_addmul_1 is overkill */
 224               cy = mpn_addmul_1 (tp + size - vsize, s1p, ssize, qh);
 225               if (cy != 0)
 226                 tp[ssize++] = cy;
 227             }
 228 #if 0
 229           MPN_COPY (s0p, s1p, ssize); /* should be old ssize, kind of */
 230           MPN_COPY (s1p, tp, ssize);
 231 #else
 232           {
 233             mp_ptr xp;
 234             xp = s0p; s0p = s1p; s1p = xp;
 235             xp = s1p; s1p = tp; tp = xp;
 236           }
 237 #endif
 238 #endif
 239           size = vsize;
 240           if (cnt != 0)
 241             {
 242               mpn_rshift (up, up, size, cnt);
 243               mpn_rshift (vp, vp, size, cnt);
 244             }
 245
 246           {
 247             mp_ptr xp;
 248             xp = up; up = vp; vp = xp;
 249           }
 250           MPN_NORMALIZE (up, size);
 251         }
 252       else
 253         {
 254           /* T = U*A + V*B
 255              W = U*C + V*D
 256              U = T
 257              V = W         */
 258
 259           if (SGN(A) == SGN(B)) /* should be different sign */
 260             abort ();
 261           if (SGN(C) == SGN(D)) /* should be different sign */
 262             abort ();
 263 #if STAT
 264           { mp_limb_t x;
 265             x = ABS (A) | ABS (B) | ABS (C) | ABS (D);
 266             count_leading_zeros (cnt, x);
 267             arr[BITS_PER_MP_LIMB - cnt]++; }
 268 #endif
 269           if (A == 0)
 270             {
 271               if (B != 1) abort ();
 272               MPN_COPY (tp, vp, size);
 273             }
 274           else
 275             {
 276               if (A < 0)
 277                 {
 278                   mpn_mul_1 (tp, vp, size, B);
 279                   mpn_submul_1 (tp, up, size, -A);
 280                 }
 281               else
 282                 {
 283                   mpn_mul_1 (tp, up, size, A);
 284                   mpn_submul_1 (tp, vp, size, -B);
 285                 }
 286             }
 287           if (C < 0)
 288             {
 289               mpn_mul_1 (wp, vp, size, D);
 290               mpn_submul_1 (wp, up, size, -C);
 291             }
 292           else
 293             {
 294               mpn_mul_1 (wp, up, size, C);
 295               mpn_submul_1 (wp, vp, size, -D);
 296             }
 297
 298           {
 299             mp_ptr xp;
 300             xp = tp; tp = up; up = xp;
 301             xp = wp; wp = vp; vp = xp;
 302           }
 303
 304 #if EXTEND
 305           { mp_limb_t cy;
 306           MPN_ZERO (tp, orig_size);
 307           if (A == 0)
 308             {
 309               if (B != 1) abort ();
 310               MPN_COPY (tp, s1p, ssize);
 311             }
 312           else
 313             {
 314               if (A < 0)
 315                 {
 316                   cy = mpn_mul_1 (tp, s1p, ssize, B);
 317                   cy += mpn_addmul_1 (tp, s0p, ssize, -A);
 318                 }
 319               else
 320                 {
 321                   cy = mpn_mul_1 (tp, s0p, ssize, A);
 322                   cy += mpn_addmul_1 (tp, s1p, ssize, -B);
 323                 }
 324               if (cy != 0)
 325                 tp[ssize++] = cy;
 326             }
 327           MPN_ZERO (wp, orig_size);
 328           if (C < 0)
 329             {
 330               cy = mpn_mul_1 (wp, s1p, ssize, D);
 331               cy += mpn_addmul_1 (wp, s0p, ssize, -C);
 332             }
 333           else
 334             {
 335               cy = mpn_mul_1 (wp, s0p, ssize, C);
 336               cy += mpn_addmul_1 (wp, s1p, ssize, -D);
 337             }
 338           if (cy != 0)
 339             wp[ssize++] = cy;
 340           }
 341           {
 342             mp_ptr xp;
 343             xp = tp; tp = s0p; s0p = xp;
 344             xp = wp; wp = s1p; s1p = xp;
 345           }
 346 #endif
 347 #if 0   /* Is it a win to remove multiple zeros here? */
 348           MPN_NORMALIZE (up, size);
 349 #else
 350           if (up[size - 1] == 0)
 351             size--;
 352 #endif
 353         }
 354     }
 355
 356 #if RECORD
 357   printf ("min: %ld\n", min);
 358   printf ("max: %ld\n", max);
 359 #endif
 360
 361   if (vsize == 0)
 362     {
 363       if (gp != up)
 364         MPN_COPY (gp, up, size);
 365 #if EXTEND
 366       if (orig_s0p != s0p)
 367         MPN_COPY (orig_s0p, s0p, ssize);
 368 #endif
 369       TMP_FREE (mark);
 370       return size;
 371     }
 372   else
 373     {
 374       mp_limb_t vl, ul, t;
 375 #if EXTEND
 376       mp_limb_t cy;
 377       mp_size_t i;
 378 #endif
 379       vl = vp[0];
 380 #if EXTEND
 381       t = mpn_divmod_1 (wp, up, size, vl);
 382       MPN_COPY (tp, s0p, ssize);
 383       for (i = 0; i < size; i++)
 384         {
 385           cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]);
 386           if (cy != 0)
 387             tp[ssize++] = cy;
 388         }
 389 #if 0
 390       MPN_COPY (s0p, s1p, ssize);
 391       MPN_COPY (s1p, tp, ssize);
 392 #else
 393       {
 394         mp_ptr xp;
 395         xp = s0p; s0p = s1p; s1p = xp;
 396         xp = s1p; s1p = tp; tp = xp;
 397       }
 398 #endif
 399 #else
 400       t = mpn_mod_1 (up, size, vl);
 401 #endif
 402       ul = vl;
 403       vl = t;
 404       while (vl != 0)
 405         {
 406           mp_limb_t t;
 407 #if EXTEND
 408           mp_limb_t q, cy;
 409           q = ul / vl;
 410           t = ul - q*vl;
 411
 412           MPN_COPY (tp, s0p, ssize);
 413           cy = mpn_addmul_1 (tp, s1p, ssize, q);
 414           if (cy != 0)
 415             tp[ssize++] = cy;
 416 #if 0
 417           MPN_COPY (s0p, s1p, ssize);
 418           MPN_COPY (s1p, tp, ssize);
 419 #else
 420           {
 421             mp_ptr xp;
 422             xp = s0p; s0p = s1p; s1p = xp;
 423             xp = s1p; s1p = tp; tp = xp;
 424           }
 425 #endif
 426
 427 #else
 428           t = ul % vl;
 429 #endif
 430           ul = vl;
 431           vl = t;
 432         }
 433       gp[0] = ul;
 434 #if EXTEND
 435       if (orig_s0p != s0p)
 436         MPN_COPY (orig_s0p, s0p, ssize);
 437 #endif
 438       TMP_FREE (mark);
 439       return 1;
 440     }
 441 }