contrib/gmp/mpn/generic/hgcd.c

   1 /* hgcd.c.
   2
   3    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
   4    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   5    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
   6
   7 Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
   8
   9 This file is part of the GNU MP Library.
  10
  11 The GNU MP Library is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU Lesser General Public License as published by
  13 the Free Software Foundation; either version 3 of the License, or (at your
  14 option) any later version.
  15
  16 The GNU MP Library is distributed in the hope that it will be useful, but
  17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  18 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  19 License for more details.
  20
  21 You should have received a copy of the GNU Lesser General Public License
  22 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  23
  24 #include "gmp.h"
  25 #include "gmp-impl.h"
  26 #include "longlong.h"
  27
  28 /* For input of size n, matrix elements are of size at most ceil(n/2)
  29    - 1, but we need two limbs extra. */
  30 void
  31 mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p)
  32 {
  33   mp_size_t s = (n+1)/2 + 1;
  34   M->alloc = s;
  35   M->n = 1;
  36   MPN_ZERO (p, 4 * s);
  37   M->p[0][0] = p;
  38   M->p[0][1] = p + s;
  39   M->p[1][0] = p + 2 * s;
  40   M->p[1][1] = p + 3 * s;
  41
  42   M->p[0][0][0] = M->p[1][1][0] = 1;
  43 }
  44
  45 /* Updated column COL, adding in column (1-COL). */
  46 static void
  47 hgcd_matrix_update_1 (struct hgcd_matrix *M, unsigned col)
  48 {
  49   mp_limb_t c0, c1;
  50   ASSERT (col < 2);
  51
  52   c0 = mpn_add_n (M->p[0][col], M->p[0][0], M->p[0][1], M->n);
  53   c1 = mpn_add_n (M->p[1][col], M->p[1][0], M->p[1][1], M->n);
  54
  55   M->p[0][col][M->n] = c0;
  56   M->p[1][col][M->n] = c1;
  57
  58   M->n += (c0 | c1) != 0;
  59   ASSERT (M->n < M->alloc);
  60 }
  61
  62 /* Updated column COL, adding in column Q * (1-COL). Temporary
  63  * storage: qn + n <= M->alloc, where n is the size of the largest
  64  * element in column 1 - COL. */
  65 static void
  66 hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn,
  67                       unsigned col, mp_ptr tp)
  68 {
  69   ASSERT (col < 2);
  70
  71   if (qn == 1)
  72     {
  73       mp_limb_t q = qp[0];
  74       mp_limb_t c0, c1;
  75
  76       c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q);
  77       c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q);
  78
  79       M->p[0][col][M->n] = c0;
  80       M->p[1][col][M->n] = c1;
  81
  82       M->n += (c0 | c1) != 0;
  83     }
  84   else
  85     {
  86       unsigned row;
  87
  88       /* Carries for the unlikely case that we get both high words
  89          from the multiplication and carries from the addition. */
  90       mp_limb_t c[2];
  91       mp_size_t n;
  92
  93       /* The matrix will not necessarily grow in size by qn, so we
  94          need normalization in order not to overflow M. */
  95
  96       for (n = M->n; n + qn > M->n; n--)
  97         {
  98           ASSERT (n > 0);
  99           if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0)
 100             break;
 101         }
 102
 103       ASSERT (qn + n <= M->alloc);
 104
 105       for (row = 0; row < 2; row++)
 106         {
 107           if (qn <= n)
 108             mpn_mul (tp, M->p[row][1-col], n, qp, qn);
 109           else
 110             mpn_mul (tp, qp, qn, M->p[row][1-col], n);
 111
 112           ASSERT (n + qn >= M->n);
 113           c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n);
 114         }
 115       if (c[0] | c[1])
 116         {
 117           M->n = n + qn + 1;
 118           M->p[0][col][n-1] = c[0];
 119           M->p[1][col][n-1] = c[1];
 120         }
 121       else
 122         {
 123           n += qn;
 124           n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0;
 125           if (n > M->n)
 126             M->n = n;
 127         }
 128     }
 129
 130   ASSERT (M->n < M->alloc);
 131 }
 132
 133 /* Multiply M by M1 from the right. Since the M1 elements fit in
 134    GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs
 135    temporary space M->n */
 136 static void
 137 hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1,
 138                    mp_ptr tp)
 139 {
 140   mp_size_t n0, n1;
 141
 142   /* Could avoid copy by some swapping of pointers. */
 143   MPN_COPY (tp, M->p[0][0], M->n);
 144   n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n);
 145   MPN_COPY (tp, M->p[1][0], M->n);
 146   n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n);
 147
 148   /* Depends on zero initialization */
 149   M->n = MAX(n0, n1);
 150   ASSERT (M->n < M->alloc);
 151 }
 152
 153 /* Perform a few steps, using some of mpn_hgcd2, subtraction and
 154    division. Reduces the size by almost one limb or more, but never
 155    below the given size s. Return new size for a and b, or 0 if no
 156    more steps are possible.
 157
 158    If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n
 159    limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
 160    fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
 161    hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
 162    resulting size of $.
 163
 164    If N is the input size to the calling hgcd, then s = floor(N/2) +
 165    1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
 166    < N, so N is sufficient.
 167 */
 168
 169 static mp_size_t
 170 hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
 171            struct hgcd_matrix *M, mp_ptr tp)
 172 {
 173   struct hgcd_matrix1 M1;
 174   mp_limb_t mask;
 175   mp_limb_t ah, al, bh, bl;
 176   mp_size_t an, bn, qn;
 177   int col;
 178
 179   ASSERT (n > s);
 180
 181   mask = ap[n-1] | bp[n-1];
 182   ASSERT (mask > 0);
 183
 184   if (n == s + 1)
 185     {
 186       if (mask < 4)
 187         goto subtract;
 188
 189       ah = ap[n-1]; al = ap[n-2];
 190       bh = bp[n-1]; bl = bp[n-2];
 191     }
 192   else if (mask & GMP_NUMB_HIGHBIT)
 193     {
 194       ah = ap[n-1]; al = ap[n-2];
 195       bh = bp[n-1]; bl = bp[n-2];
 196     }
 197   else
 198     {
 199       int shift;
 200
 201       count_leading_zeros (shift, mask);
 202       ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
 203       al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
 204       bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
 205       bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
 206     }
 207
 208   /* Try an mpn_hgcd2 step */
 209   if (mpn_hgcd2 (ah, al, bh, bl, &M1))
 210     {
 211       /* Multiply M <- M * M1 */
 212       hgcd_matrix_mul_1 (M, &M1, tp);
 213
 214       /* Can't swap inputs, so we need to copy. */
 215       MPN_COPY (tp, ap, n);
 216       /* Multiply M1^{-1} (a;b) */
 217       return mpn_hgcd_mul_matrix1_inverse_vector (&M1, ap, tp, bp, n);
 218     }
 219
 220  subtract:
 221   /* There are two ways in which mpn_hgcd2 can fail. Either one of ah and
 222      bh was too small, or ah, bh were (almost) equal. Perform one
 223      subtraction step (for possible cancellation of high limbs),
 224      followed by one division. */
 225
 226   /* Since we must ensure that #(a-b) > s, we handle cancellation of
 227      high limbs explicitly up front. (FIXME: Or is it better to just
 228      subtract, normalize, and use an addition to undo if it turns out
 229      the the difference is too small?) */
 230   for (an = n; an > s; an--)
 231     if (ap[an-1] != bp[an-1])
 232       break;
 233
 234   if (an == s)
 235     return 0;
 236
 237   /* Maintain a > b. When needed, swap a and b, and let col keep track
 238      of how to update M. */
 239   if (ap[an-1] > bp[an-1])
 240     {
 241       /* a is largest. In the subtraction step, we need to update
 242          column 1 of M */
 243       col = 1;
 244     }
 245   else
 246     {
 247       MP_PTR_SWAP (ap, bp);
 248       col = 0;
 249     }
 250
 251   bn = n;
 252   MPN_NORMALIZE (bp, bn);
 253   if (bn <= s)
 254     return 0;
 255
 256   /* We have #a, #b > s. When is it possible that #(a-b) < s? For
 257      cancellation to happen, the numbers must be of the form
 258
 259        a = x + 1, 0,            ..., 0,            al
 260        b = x    , GMP_NUMB_MAX, ..., GMP_NUMB_MAX, bl
 261
 262      where al, bl denotes the least significant k limbs. If al < bl,
 263      then #(a-b) < k, and if also high(al) != 0, high(bl) != GMP_NUMB_MAX,
 264      then #(a-b) = k. If al >= bl, then #(a-b) = k + 1. */
 265
 266   if (ap[an-1] == bp[an-1] + 1)
 267     {
 268       mp_size_t k;
 269       int c;
 270       for (k = an-1; k > s; k--)
 271         if (ap[k-1] != 0 || bp[k-1] != GMP_NUMB_MAX)
 272           break;
 273
 274       MPN_CMP (c, ap, bp, k);
 275       if (c < 0)
 276         {
 277           mp_limb_t cy;
 278
 279           /* The limbs from k and up are cancelled. */
 280           if (k == s)
 281             return 0;
 282           cy = mpn_sub_n (ap, ap, bp, k);
 283           ASSERT (cy == 1);
 284           an = k;
 285         }
 286       else
 287         {
 288           ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, k));
 289           ap[k] = 1;
 290           an = k + 1;
 291         }
 292     }
 293   else
 294     ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, an));
 295
 296   ASSERT (an > s);
 297   ASSERT (ap[an-1] > 0);
 298   ASSERT (bn > s);
 299   ASSERT (bp[bn-1] > 0);
 300
 301   hgcd_matrix_update_1 (M, col);
 302
 303   if (an < bn)
 304     {
 305       MPN_PTR_SWAP (ap, an, bp, bn);
 306       col ^= 1;
 307     }
 308   else if (an == bn)
 309     {
 310       int c;
 311       MPN_CMP (c, ap, bp, an);
 312       if (c < 0)
 313         {
 314           MP_PTR_SWAP (ap, bp);
 315           col ^= 1;
 316         }
 317     }
 318
 319   /* Divide a / b. */
 320   qn = an + 1 - bn;
 321
 322   /* FIXME: We could use an approximate division, that may return a
 323      too small quotient, and only guarantee that the size of r is
 324      almost the size of b. FIXME: Let ap and remainder overlap. */
 325   mpn_tdiv_qr (tp, ap, 0, ap, an, bp, bn);
 326   qn -= (tp[qn -1] == 0);
 327
 328   /* Normalize remainder */
 329   an = bn;
 330   for ( ; an > s; an--)
 331     if (ap[an-1] > 0)
 332       break;
 333
 334   if (an <= s)
 335     {
 336       /* Quotient is too large */
 337       mp_limb_t cy;
 338
 339       cy = mpn_add (ap, bp, bn, ap, an);
 340
 341       if (cy > 0)
 342         {
 343           ASSERT (bn < n);
 344           ap[bn] = cy;
 345           bp[bn] = 0;
 346           bn++;
 347         }
 348
 349       MPN_DECR_U (tp, qn, 1);
 350       qn -= (tp[qn-1] == 0);
 351     }
 352
 353   if (qn > 0)
 354     hgcd_matrix_update_q (M, tp, qn, col, tp + qn);
 355
 356   return bn;
 357 }
 358
 359 /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
 360    with elements of size at most (n+1)/2 - 1. Returns new size of a,
 361    b, or zero if no reduction is possible. */
 362 mp_size_t
 363 mpn_hgcd_lehmer (mp_ptr ap, mp_ptr bp, mp_size_t n,
 364                  struct hgcd_matrix *M, mp_ptr tp)
 365 {
 366   mp_size_t s = n/2 + 1;
 367   mp_size_t nn;
 368
 369   ASSERT (n > s);
 370   ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
 371
 372   nn = hgcd_step (n, ap, bp, s, M, tp);
 373   if (!nn)
 374     return 0;
 375
 376   for (;;)
 377     {
 378       n = nn;
 379       ASSERT (n > s);
 380       nn = hgcd_step (n, ap, bp, s, M, tp);
 381       if (!nn )
 382         return n;
 383     }
 384 }
 385
 386 /* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs
 387    of temporary storage (see mpn_matrix22_mul_itch). */
 388 void
 389 mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1,
 390                      mp_ptr tp)
 391 {
 392   mp_size_t n;
 393
 394   /* About the new size of M:s elements. Since M1's diagonal elements
 395      are > 0, no element can decrease. The new elements are of size
 396      M->n + M1->n, one limb more or less. The computation of the
 397      matrix product produces elements of size M->n + M1->n + 1. But
 398      the true size, after normalization, may be three limbs smaller.
 399
 400      The reason that the product has normalized size >= M->n + M1->n -
 401      2 is subtle. It depends on the fact that M and M1 can be factored
 402      as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have
 403      M ending with a large power and M1 starting with a large power of
 404      the same matrix. */
 405
 406   /* FIXME: Strassen multiplication gives only a small speedup. In FFT
 407      multiplication range, this function could be sped up quite a lot
 408      using invariance. */
 409   ASSERT (M->n + M1->n < M->alloc);
 410
 411   ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1]
 412            | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0);
 413
 414   ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1]
 415            | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0);
 416
 417   mpn_matrix22_mul (M->p[0][0], M->p[0][1],
 418                     M->p[1][0], M->p[1][1], M->n,
 419                     M1->p[0][0], M1->p[0][1],
 420                     M1->p[1][0], M1->p[1][1], M1->n, tp);
 421
 422   /* Index of last potentially non-zero limb, size is one greater. */
 423   n = M->n + M1->n;
 424
 425   n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
 426   n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
 427   n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
 428
 429   ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0);
 430
 431   M->n = n + 1;
 432 }
 433
 434 /* Multiplies the least significant p limbs of (a;b) by M^-1.
 435    Temporary space needed: 2 * (p + M->n)*/
 436 mp_size_t
 437 mpn_hgcd_matrix_adjust (struct hgcd_matrix *M,
 438                         mp_size_t n, mp_ptr ap, mp_ptr bp,
 439                         mp_size_t p, mp_ptr tp)
 440 {
 441   /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b)
 442      = (r11 a - r01 b; - r10 a + r00 b */
 443
 444   mp_ptr t0 = tp;
 445   mp_ptr t1 = tp + p + M->n;
 446   mp_limb_t ah, bh;
 447   mp_limb_t cy;
 448
 449   ASSERT (p + M->n  < n);
 450
 451   /* First compute the two values depending on a, before overwriting a */
 452
 453   if (M->n >= p)
 454     {
 455       mpn_mul (t0, M->p[1][1], M->n, ap, p);
 456       mpn_mul (t1, M->p[1][0], M->n, ap, p);
 457     }
 458   else
 459     {
 460       mpn_mul (t0, ap, p, M->p[1][1], M->n);
 461       mpn_mul (t1, ap, p, M->p[1][0], M->n);
 462     }
 463
 464   /* Update a */
 465   MPN_COPY (ap, t0, p);
 466   ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n);
 467
 468   if (M->n >= p)
 469     mpn_mul (t0, M->p[0][1], M->n, bp, p);
 470   else
 471     mpn_mul (t0, bp, p, M->p[0][1], M->n);
 472
 473   cy = mpn_sub (ap, ap, n, t0, p + M->n);
 474   ASSERT (cy <= ah);
 475   ah -= cy;
 476
 477   /* Update b */
 478   if (M->n >= p)
 479     mpn_mul (t0, M->p[0][0], M->n, bp, p);
 480   else
 481     mpn_mul (t0, bp, p, M->p[0][0], M->n);
 482
 483   MPN_COPY (bp, t0, p);
 484   bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n);
 485   cy = mpn_sub (bp, bp, n, t1, p + M->n);
 486   ASSERT (cy <= bh);
 487   bh -= cy;
 488
 489   if (ah > 0 || bh > 0)
 490     {
 491       ap[n] = ah;
 492       bp[n] = bh;
 493       n++;
 494     }
 495   else
 496     {
 497       /* The subtraction can reduce the size by at most one limb. */
 498       if (ap[n-1] == 0 && bp[n-1] == 0)
 499         n--;
 500     }
 501   ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
 502   return n;
 503 }
 504
 505 /* Size analysis for hgcd:
 506
 507    For the recursive calls, we have n1 <= ceil(n / 2). Then the
 508    storage need is determined by the storage for the recursive call
 509    computing M1, and hgcd_matrix_adjust and hgcd_matrix_mul calls that use M1
 510    (after this, the storage needed for M1 can be recycled).
 511
 512    Let S(r) denote the required storage. For M1 we need 4 * (ceil(n1/2) + 1)
 513    = 4 * (ceil(n/4) + 1), for the hgcd_matrix_adjust call, we need n + 2,
 514    and for the hgcd_matrix_mul, we may need 3 ceil(n/2) + 8. In total,
 515    4 * ceil(n/4) + 3 ceil(n/2) + 12 <= 10 ceil(n/4) + 12.
 516
 517    For the recursive call, we need S(n1) = S(ceil(n/2)).
 518
 519    S(n) <= 10*ceil(n/4) + 12 + S(ceil(n/2))
 520         <= 10*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 12k + S(ceil(n/2^k))
 521         <= 10*(2 ceil(n/4) + k) + 12k + S(ceil(n/2^k))
 522         <= 20 ceil(n/4) + 22k + S(ceil(n/2^k))
 523 */
 524
 525 mp_size_t
 526 mpn_hgcd_itch (mp_size_t n)
 527 {
 528   unsigned k;
 529   int count;
 530   mp_size_t nscaled;
 531
 532   if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
 533     return MPN_HGCD_LEHMER_ITCH (n);
 534
 535   /* Get the recursion depth. */
 536   nscaled = (n - 1) / (HGCD_THRESHOLD - 1);
 537   count_leading_zeros (count, nscaled);
 538   k = GMP_LIMB_BITS - count;
 539
 540   return 20 * ((n+3) / 4) + 22 * k
 541     + MPN_HGCD_LEHMER_ITCH (HGCD_THRESHOLD);
 542 }
 543
 544 /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
 545    with elements of size at most (n+1)/2 - 1. Returns new size of a,
 546    b, or zero if no reduction is possible. */
 547
 548 mp_size_t
 549 mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
 550           struct hgcd_matrix *M, mp_ptr tp)
 551 {
 552   mp_size_t s = n/2 + 1;
 553   mp_size_t n2 = (3*n)/4 + 1;
 554
 555   mp_size_t p, nn;
 556   int success = 0;
 557
 558   if (n <= s)
 559     /* Happens when n <= 2, a fairly uninteresting case but exercised
 560        by the random inputs of the testsuite. */
 561     return 0;
 562
 563   ASSERT ((ap[n-1] | bp[n-1]) > 0);
 564
 565   ASSERT ((n+1)/2 - 1 < M->alloc);
 566
 567   if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
 568     return mpn_hgcd_lehmer (ap, bp, n, M, tp);
 569
 570   p = n/2;
 571   nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
 572   if (nn > 0)
 573     {
 574       /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
 575          = 2 (n - 1) */
 576       n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
 577       success = 1;
 578     }
 579   while (n > n2)
 580     {
 581       /* Needs n + 1 storage */
 582       nn = hgcd_step (n, ap, bp, s, M, tp);
 583       if (!nn)
 584         return success ? n : 0;
 585       n = nn;
 586       success = 1;
 587     }
 588
 589   if (n > s + 2)
 590     {
 591       struct hgcd_matrix M1;
 592       mp_size_t scratch;
 593
 594       p = 2*s - n + 1;
 595       scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
 596
 597       mpn_hgcd_matrix_init(&M1, n - p, tp);
 598       nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
 599       if (nn > 0)
 600         {
 601           /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
 602           ASSERT (M->n + 2 >= M1.n);
 603
 604           /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
 605              then either q or q + 1 is a correct quotient, and M1 will
 606              start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
 607              rules out the case that the size of M * M1 is much
 608              smaller than the expected M->n + M1->n. */
 609
 610           ASSERT (M->n + M1.n < M->alloc);
 611
 612           /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
 613              = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
 614           n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
 615
 616           /* We need a bound for of M->n + M1.n. Let n be the original
 617              input size. Then
 618
 619                ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
 620
 621              and it follows that
 622
 623                M.n + M1.n <= ceil(n/2) + 1
 624
 625              Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
 626              amount of needed scratch space. */
 627           mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
 628           success = 1;
 629         }
 630     }
 631
 632   /* This really is the base case */
 633   for (;;)
 634     {
 635       /* Needs s+3 < n */
 636       nn = hgcd_step (n, ap, bp, s, M, tp);
 637       if (!nn)
 638         return success ? n : 0;
 639
 640       n = nn;
 641       success = 1;
 642     }
 643 }