contrib/mpfr/src/mulders.c

   1 /* Mulders' MulHigh function (short product)
   2
   3 Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
   4 Contributed by the Arenaire and Caramel projects, INRIA.
   5
   6 This file is part of the GNU MPFR Library.
   7
   8 The GNU MPFR Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MPFR Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
  20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
  21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
  22
  23 /* References:
  24    [1] Short Division of Long Integers, David Harvey and Paul Zimmermann,
  25        Proceedings of the 20th Symposium on Computer Arithmetic (ARITH-20),
  26        July 25-27, 2011, pages 7-14.
  27 */
  28
  29 #define MPFR_NEED_LONGLONG_H
  30 #include "mpfr-impl.h"
  31
  32 #ifndef MUL_FFT_THRESHOLD
  33 #define MUL_FFT_THRESHOLD 8448
  34 #endif
  35
  36 /* Don't use MPFR_MULHIGH_SIZE since it is handled by tuneup */
  37 #ifdef MPFR_MULHIGH_TAB_SIZE
  38 static short mulhigh_ktab[MPFR_MULHIGH_TAB_SIZE];
  39 #else
  40 static short mulhigh_ktab[] = {MPFR_MULHIGH_TAB};
  41 #define MPFR_MULHIGH_TAB_SIZE \
  42   ((mp_size_t) (sizeof(mulhigh_ktab) / sizeof(mulhigh_ktab[0])))
  43 #endif
  44
  45 /* Put in  rp[n..2n-1] an approximation of the n high limbs
  46    of {up, n} * {vp, n}. The error is less than n ulps of rp[n] (and the
  47    approximation is always less or equal to the truncated full product).
  48    Assume 2n limbs are allocated at rp.
  49
  50    Implements Algorithm ShortMulNaive from [1].
  51 */
  52 static void
  53 mpfr_mulhigh_n_basecase (mpfr_limb_ptr rp, mpfr_limb_srcptr up,
  54                          mpfr_limb_srcptr vp, mp_size_t n)
  55 {
  56   mp_size_t i;
  57
  58   rp += n - 1;
  59   umul_ppmm (rp[1], rp[0], up[n-1], vp[0]); /* we neglect up[0..n-2]*vp[0],
  60                                                which is less than B^n */
  61   for (i = 1 ; i < n ; i++)
  62     /* here, we neglect up[0..n-i-2] * vp[i], which is less than B^n too */
  63     rp[i + 1] = mpn_addmul_1 (rp, up + (n - i - 1), i + 1, vp[i]);
  64   /* in total, we neglect less than n*B^n, i.e., n ulps of rp[n]. */
  65 }
  66
  67 /* Put in  rp[0..n] the n+1 low limbs of {up, n} * {vp, n}.
  68    Assume 2n limbs are allocated at rp. */
  69 static void
  70 mpfr_mullow_n_basecase (mpfr_limb_ptr rp, mpfr_limb_srcptr up,
  71                         mpfr_limb_srcptr vp, mp_size_t n)
  72 {
  73   mp_size_t i;
  74
  75   rp[n] = mpn_mul_1 (rp, up, n, vp[0]);
  76   for (i = 1 ; i < n ; i++)
  77     mpn_addmul_1 (rp + i, up, n - i + 1, vp[i]);
  78 }
  79
  80 /* Put in  rp[n..2n-1] an approximation of the n high limbs
  81    of {np, n} * {mp, n}. The error is less than n ulps of rp[n] (and the
  82    approximation is always less or equal to the truncated full product).
  83
  84    Implements Algorithm ShortMul from [1].
  85 */
  86 void
  87 mpfr_mulhigh_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mpfr_limb_srcptr mp,
  88                 mp_size_t n)
  89 {
  90   mp_size_t k;
  91
  92   MPFR_ASSERTN (MPFR_MULHIGH_TAB_SIZE >= 8); /* so that 3*(n/4) > n/2 */
  93   k = MPFR_LIKELY (n < MPFR_MULHIGH_TAB_SIZE) ? mulhigh_ktab[n] : 3*(n/4);
  94   /* Algorithm ShortMul from [1] requires k >= (n+3)/2, which translates
  95      into k >= (n+4)/2 in the C language. */
  96   MPFR_ASSERTD (k == -1 || k == 0 || (k >= (n+4)/2 && k < n));
  97   if (k < 0)
  98     mpn_mul_basecase (rp, np, n, mp, n); /* result is exact, no error */
  99   else if (k == 0)
 100     mpfr_mulhigh_n_basecase (rp, np, mp, n); /* basecase error < n ulps */
 101   else if (n > MUL_FFT_THRESHOLD)
 102     mpn_mul_n (rp, np, mp, n); /* result is exact, no error */
 103   else
 104     {
 105       mp_size_t l = n - k;
 106       mp_limb_t cy;
 107
 108       mpn_mul_n (rp + 2 * l, np + l, mp + l, k); /* fills rp[2l..2n-1] */
 109       mpfr_mulhigh_n (rp, np + k, mp, l);        /* fills rp[l-1..2l-1] */
 110       cy = mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
 111       mpfr_mulhigh_n (rp, np, mp + k, l);        /* fills rp[l-1..2l-1] */
 112       cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
 113       mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */
 114     }
 115 }
 116
 117 /* Put in  rp[0..n] the n+1 low limbs of {np, n} * {mp, n}.
 118    Assume 2n limbs are allocated at rp. */
 119 void
 120 mpfr_mullow_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mpfr_limb_srcptr mp,
 121                mp_size_t n)
 122 {
 123   mp_size_t k;
 124
 125   MPFR_ASSERTN (MPFR_MULHIGH_TAB_SIZE >= 8); /* so that 3*(n/4) > n/2 */
 126   k = MPFR_LIKELY (n < MPFR_MULHIGH_TAB_SIZE) ? mulhigh_ktab[n] : 3*(n/4);
 127   MPFR_ASSERTD (k == -1 || k == 0 || (2 * k >= n && k < n));
 128   if (k < 0)
 129     mpn_mul_basecase (rp, np, n, mp, n);
 130   else if (k == 0)
 131     mpfr_mullow_n_basecase (rp, np, mp, n);
 132   else if (n > MUL_FFT_THRESHOLD)
 133     mpn_mul_n (rp, np, mp, n);
 134   else
 135     {
 136       mp_size_t l = n - k;
 137
 138       mpn_mul_n (rp, np, mp, k);                      /* fills rp[0..2k] */
 139       mpfr_mullow_n (rp + n, np + k, mp, l);          /* fills rp[n..n+2l] */
 140       mpn_add_n (rp + k, rp + k, rp + n, l + 1);
 141       mpfr_mullow_n (rp + n, np, mp + k, l);          /* fills rp[n..n+2l] */
 142       mpn_add_n (rp + k, rp + k, rp + n, l + 1);
 143     }
 144 }
 145
 146 #ifdef MPFR_SQRHIGH_TAB_SIZE
 147 static short sqrhigh_ktab[MPFR_SQRHIGH_TAB_SIZE];
 148 #else
 149 static short sqrhigh_ktab[] = {MPFR_SQRHIGH_TAB};
 150 #define MPFR_SQRHIGH_TAB_SIZE (sizeof(sqrhigh_ktab) / sizeof(sqrhigh_ktab[0]))
 151 #endif
 152
 153 /* Put in  rp[n..2n-1] an approximation of the n high limbs
 154    of {np, n}^2. The error is less than n ulps of rp[n]. */
 155 void
 156 mpfr_sqrhigh_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mp_size_t n)
 157 {
 158   mp_size_t k;
 159
 160   MPFR_ASSERTN (MPFR_SQRHIGH_TAB_SIZE > 2); /* ensures k < n */
 161   k = MPFR_LIKELY (n < MPFR_SQRHIGH_TAB_SIZE) ? sqrhigh_ktab[n]
 162     : (n+4)/2; /* ensures that k >= (n+3)/2 */
 163   MPFR_ASSERTD (k == -1 || k == 0 || (k >= (n+4)/2 && k < n));
 164   if (k < 0)
 165     /* we can't use mpn_sqr_basecase here, since it requires
 166        n <= SQR_KARATSUBA_THRESHOLD, where SQR_KARATSUBA_THRESHOLD
 167        is not exported by GMP */
 168     mpn_sqr_n (rp, np, n);
 169   else if (k == 0)
 170     mpfr_mulhigh_n_basecase (rp, np, np, n);
 171   else
 172     {
 173       mp_size_t l = n - k;
 174       mp_limb_t cy;
 175
 176       mpn_sqr_n (rp + 2 * l, np + l, k);          /* fills rp[2l..2n-1] */
 177       mpfr_mulhigh_n (rp, np, np + k, l);         /* fills rp[l-1..2l-1] */
 178       /* {rp+n-1,l+1} += 2 * {rp+l-1,l+1} */
 179       cy = mpn_lshift (rp + l - 1, rp + l - 1, l + 1, 1);
 180       cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
 181       mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */
 182     }
 183 }
 184
 185 #ifdef MPFR_DIVHIGH_TAB_SIZE
 186 static short divhigh_ktab[MPFR_DIVHIGH_TAB_SIZE];
 187 #else
 188 static short divhigh_ktab[] = {MPFR_DIVHIGH_TAB};
 189 #define MPFR_DIVHIGH_TAB_SIZE (sizeof(divhigh_ktab) / sizeof(divhigh_ktab[0]))
 190 #endif
 191
 192 #ifndef __GMPFR_GMP_H__
 193 #define mpfr_pi1_t gmp_pi1_t /* with a GMP build */
 194 #endif
 195
 196 #if !(defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q))
 197 /* Put in Q={qp, n} an approximation of N={np, 2*n} divided by D={dp, n},
 198    with the most significant limb of the quotient as return value (0 or 1).
 199    Assumes the most significant bit of D is set. Clobbers N.
 200
 201    The approximate quotient Q satisfies - 2(n-1) < N/D - Q <= 4.
 202 */
 203 static mp_limb_t
 204 mpfr_divhigh_n_basecase (mpfr_limb_ptr qp, mpfr_limb_ptr np,
 205                          mpfr_limb_srcptr dp, mp_size_t n)
 206 {
 207   mp_limb_t qh, d1, d0, dinv, q2, q1, q0;
 208   mpfr_pi1_t dinv2;
 209
 210   np += n;
 211
 212   if ((qh = (mpn_cmp (np, dp, n) >= 0)))
 213     mpn_sub_n (np, np, dp, n);
 214
 215   /* now {np, n} is less than D={dp, n}, which implies np[n-1] <= dp[n-1] */
 216
 217   d1 = dp[n - 1];
 218
 219   if (n == 1)
 220     {
 221       invert_limb (dinv, d1);
 222       umul_ppmm (q1, q0, np[0], dinv);
 223       qp[0] = np[0] + q1;
 224       return qh;
 225     }
 226
 227   /* now n >= 2 */
 228   d0 = dp[n - 2];
 229   invert_pi1 (dinv2, d1, d0);
 230   /* dinv2.inv32 = floor ((B^3 - 1) / (d0 + d1 B)) - B */
 231   while (n > 1)
 232     {
 233       /* Invariant: it remains to reduce n limbs from N (in addition to the
 234          initial low n limbs).
 235          Since n >= 2 here, necessarily we had n >= 2 initially, which means
 236          that in addition to the limb np[n-1] to reduce, we have at least 2
 237          extra limbs, thus accessing np[n-3] is valid. */
 238
 239       /* warning: we can have np[n-1]=d1 and np[n-2]=d0, but since {np,n} < D,
 240          the largest possible partial quotient is B-1 */
 241       if (MPFR_UNLIKELY(np[n - 1] == d1 && np[n - 2] == d0))
 242         q2 = ~ (mp_limb_t) 0;
 243       else
 244         udiv_qr_3by2 (q2, q1, q0, np[n - 1], np[n - 2], np[n - 3],
 245                       d1, d0, dinv2.inv32);
 246       /* since q2 = floor((np[n-1]*B^2+np[n-2]*B+np[n-3])/(d1*B+d0)),
 247          we have q2 <= (np[n-1]*B^2+np[n-2]*B+np[n-3])/(d1*B+d0),
 248          thus np[n-1]*B^2+np[n-2]*B+np[n-3] >= q2*(d1*B+d0)
 249          and {np-1, n} >= q2*D - q2*B^(n-2) >= q2*D - B^(n-1)
 250          thus {np-1, n} - (q2-1)*D >= D - B^(n-1) >= 0
 251          which proves that at most one correction is needed */
 252       q0 = mpn_submul_1 (np - 1, dp, n, q2);
 253       if (MPFR_UNLIKELY(q0 > np[n - 1]))
 254         {
 255           mpn_add_n (np - 1, np - 1, dp, n);
 256           q2 --;
 257         }
 258       qp[--n] = q2;
 259       dp ++;
 260     }
 261
 262   /* we have B+dinv2 = floor((B^3-1)/(d1*B+d0)) < B^2/d1
 263      q1 = floor(np[0]*(B+dinv2)/B) <= floor(np[0]*B/d1)
 264         <= floor((np[0]*B+np[1])/d1)
 265      thus q1 is not larger than the true quotient.
 266      q1 > np[0]*(B+dinv2)/B - 1 > np[0]*(B^3-1)/(d1*B+d0)/B - 2
 267      For d1*B+d0 <> B^2/2, we have B+dinv2 = floor(B^3/(d1*B+d0))
 268      thus q1 > np[0]*B^2/(d1*B+d0) - 2, i.e.,
 269      (d1*B+d0)*q1 > np[0]*B^2 - 2*(d1*B+d0)
 270      d1*B*q1 > np[0]*B^2 - 2*d1*B - 2*d0 - d0*q1 >= np[0]*B^2 - 2*d1*B - B^2
 271      thus q1 > np[0]*B/d1 - 2 - B/d1 > np[0]*B/d1 - 4.
 272
 273      For d1*B+d0 = B^2/2, dinv2 = B-1 thus q1 > np[0]*(2B-1)/B - 1 >
 274      np[0]*B/d1 - 2.
 275
 276      In all cases, if q = floor((np[0]*B+np[1])/d1), we have:
 277      q - 4 <= q1 <= q
 278   */
 279   umul_ppmm (q1, q0, np[0], dinv2.inv32);
 280   qp[0] = np[0] + q1;
 281
 282   return qh;
 283 }
 284 #endif
 285
 286 /* Put in {qp, n} an approximation of N={np, 2*n} divided by D={dp, n},
 287    with the most significant limb of the quotient as return value (0 or 1).
 288    Assumes the most significant bit of D is set. Clobbers N.
 289
 290    This implements the ShortDiv algorithm from reference [1].
 291 */
 292 #if 1
 293 mp_limb_t
 294 mpfr_divhigh_n (mpfr_limb_ptr qp, mpfr_limb_ptr np, mpfr_limb_ptr dp,
 295                 mp_size_t n)
 296 {
 297   mp_size_t k, l;
 298   mp_limb_t qh, cy;
 299   mpfr_limb_ptr tp;
 300   MPFR_TMP_DECL(marker);
 301
 302   MPFR_ASSERTN (MPFR_MULHIGH_TAB_SIZE >= 15); /* so that 2*(n/3) >= (n+4)/2 */
 303   k = MPFR_LIKELY (n < MPFR_DIVHIGH_TAB_SIZE) ? divhigh_ktab[n] : 2*(n/3);
 304
 305   if (k == 0)
 306 #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q)
 307   {
 308     mpfr_pi1_t dinv2;
 309     invert_pi1 (dinv2, dp[n - 1], dp[n - 2]);
 310     return __gmpn_sbpi1_divappr_q (qp, np, n + n, dp, n, dinv2.inv32);
 311   }
 312 #else /* use our own code for base-case short division */
 313     return mpfr_divhigh_n_basecase (qp, np, dp, n);
 314 #endif
 315   else if (k == n)
 316     /* for k=n, we use a division with remainder (mpn_divrem),
 317      which computes the exact quotient */
 318     return mpn_divrem (qp, 0, np, 2 * n, dp, n);
 319
 320   MPFR_ASSERTD ((n+4)/2 <= k && k < n); /* bounds from [1] */
 321   MPFR_TMP_MARK (marker);
 322   l = n - k;
 323   /* first divide the most significant 2k limbs from N by the most significant
 324      k limbs of D */
 325   qh = mpn_divrem (qp + l, 0, np + 2 * l, 2 * k, dp + l, k); /* exact */
 326
 327   /* it remains {np,2l+k} = {np,n+l} as remainder */
 328
 329   /* now we have to subtract high(Q1)*D0 where Q1=qh*B^k+{qp+l,k} and
 330      D0={dp,l} */
 331   tp = MPFR_TMP_LIMBS_ALLOC (2 * l);
 332   mpfr_mulhigh_n (tp, qp + k, dp, l);
 333   /* we are only interested in the upper l limbs from {tp,2l} */
 334   cy = mpn_sub_n (np + n, np + n, tp + l, l);
 335   if (qh)
 336     cy += mpn_sub_n (np + n, np + n, dp, l);
 337   while (cy > 0) /* Q1 was too large: subtract 1 to Q1 and add D to np+l */
 338     {
 339       qh -= mpn_sub_1 (qp + l, qp + l, k, MPFR_LIMB_ONE);
 340       cy -= mpn_add_n (np + l, np + l, dp, n);
 341     }
 342
 343   /* now it remains {np,n+l} to divide by D */
 344   cy = mpfr_divhigh_n (qp, np + k, dp + k, l);
 345   qh += mpn_add_1 (qp + l, qp + l, k, cy);
 346   MPFR_TMP_FREE(marker);
 347
 348   return qh;
 349 }
 350 #else /* below is the FoldDiv(K) algorithm from [1] */
 351 mp_limb_t
 352 mpfr_divhigh_n (mpfr_limb_ptr qp, mpfr_limb_ptr np, mpfr_limb_ptr dp,
 353                 mp_size_t n)
 354 {
 355   mp_size_t k, r;
 356   mpfr_limb_ptr ip, tp, up;
 357   mp_limb_t qh = 0, cy, cc;
 358   int count;
 359   MPFR_TMP_DECL(marker);
 360
 361 #define K 3
 362   if (n < K)
 363     return mpn_divrem (qp, 0, np, 2 * n, dp, n);
 364
 365   k = (n - 1) / K + 1; /* ceil(n/K) */
 366
 367   MPFR_TMP_MARK (marker);
 368   ip = MPFR_TMP_LIMBS_ALLOC (k + 1);
 369   tp = MPFR_TMP_LIMBS_ALLOC (n + k);
 370   up = MPFR_TMP_LIMBS_ALLOC (2 * (k + 1));
 371   mpn_invert (ip, dp + n - (k + 1), k + 1, NULL); /* takes about 13% for n=1000 */
 372   /* {ip, k+1} = floor((B^(2k+2)-1)/D - B^(k+1) where D = {dp+n-(k+1),k+1} */
 373   for (r = n, cc = 0UL; r > 0;)
 374     {
 375       /* cc is the carry at np[n+r] */
 376       MPFR_ASSERTD(cc <= 1);
 377       /* FIXME: why can we have cc as large as say 8? */
 378       count = 0;
 379       while (cc > 0)
 380         {
 381           count ++;
 382           MPFR_ASSERTD(count <= 1);
 383           /* subtract {dp+n-r,r} from {np+n,r} */
 384           cc -= mpn_sub_n (np + n, np + n, dp + n - r, r);
 385           /* add 1 at qp[r] */
 386           qh += mpn_add_1 (qp + r, qp + r, n - r, 1UL);
 387         }
 388       /* it remains r limbs to reduce, i.e., the remainder is {np, n+r} */
 389       if (r < k)
 390         {
 391           ip += k - r;
 392           k = r;
 393         }
 394       /* now r >= k */
 395       /* qp + r - 2 * k -> up */
 396       mpfr_mulhigh_n (up, np + n + r - (k + 1), ip, k + 1);
 397       /* take into account the term B^k in the inverse: B^k * {np+n+r-k, k} */
 398       cy = mpn_add_n (qp + r - k, up + k + 2, np + n + r - k, k);
 399       /* since we need only r limbs of tp (below), it suffices to consider
 400          r high limbs of dp */
 401       if (r > k)
 402         {
 403 #if 0
 404           mpn_mul (tp, dp + n - r, r, qp + r - k, k);
 405 #else  /* use a short product for the low k x k limbs */
 406           /* we know the upper k limbs of the r-limb product cancel with the
 407              remainder, thus we only need to compute the low r-k limbs */
 408           if (r - k >= k)
 409             mpn_mul (tp + k, dp + n - r + k, r - k, qp + r - k, k);
 410           else /* r-k < k */
 411             {
 412 /* #define LOW */
 413 #ifndef LOW
 414               mpn_mul (tp + k, qp + r - k, k, dp + n - r + k, r - k);
 415 #else
 416               mpfr_mullow_n_basecase (tp + k, qp + r - k, dp + n - r + k, r - k);
 417               /* take into account qp[2r-2k] * dp[n - r + k] */
 418               tp[r] += qp[2*r-2*k] * dp[n - r + k];
 419 #endif
 420               /* tp[k..r] is filled */
 421             }
 422 #if 0
 423           mpfr_mulhigh_n (up, dp + n - r, qp + r - k, k);
 424 #else /* compute one more limb. FIXME: we could add one limb of dp in the
 425          above, to save one mpn_addmul_1 call */
 426           mpfr_mulhigh_n (up, dp + n - r, qp + r - k, k - 1); /* {up,2k-2} */
 427           /* add {qp + r - k, k - 1} * dp[n-r+k-1] */
 428           up[2*k-2] = mpn_addmul_1 (up + k - 1, qp + r - k, k-1, dp[n-r+k-1]);
 429           /* add {dp+n-r, k} * qp[r-1] */
 430           up[2*k-1] = mpn_addmul_1 (up + k - 1, dp + n - r, k, qp[r-1]);
 431 #endif
 432 #ifndef LOW
 433           cc = mpn_add_n (tp + k, tp + k, up + k, k);
 434           mpn_add_1 (tp + 2 * k, tp + 2 * k, r - k, cc);
 435 #else
 436           /* update tp[k..r] */
 437           if (r - k + 1 <= k)
 438             mpn_add_n (tp + k, tp + k, up + k, r - k + 1);
 439           else /* r - k >= k */
 440             {
 441               cc = mpn_add_n (tp + k, tp + k, up + k, k);
 442               mpn_add_1 (tp + 2 * k, tp + 2 * k, r - 2 * k + 1, cc);
 443             }
 444 #endif
 445 #endif
 446         }
 447       else /* last step: since we only want the quotient, no need to update,
 448               just propagate the carry cy */
 449         {
 450           MPFR_ASSERTD(r < n);
 451           if (cy > 0)
 452             qh += mpn_add_1 (qp + r, qp + r, n - r, cy);
 453           break;
 454         }
 455       /* subtract {tp, n+k} from {np+r-k, n+k}; however we only want to
 456          update {np+n, n} */
 457       /* we should have tp[r] = np[n+r-k] up to 1 */
 458       MPFR_ASSERTD(tp[r] == np[n + r - k] || tp[r] + 1 == np[n + r - k]);
 459 #ifndef LOW
 460       cc = mpn_sub_n (np + n - 1, np + n - 1, tp + k - 1, r + 1); /* borrow at np[n+r] */
 461 #else
 462       cc = mpn_sub_n (np + n - 1, np + n - 1, tp + k - 1, r - k + 2);
 463 #endif
 464       /* if cy = 1, subtract {dp, n} from {np+r, n}, thus
 465          {dp+n-r,r} from {np+n,r} */
 466       if (cy)
 467         {
 468           if (r < n)
 469             cc += mpn_sub_n (np + n - 1, np + n - 1, dp + n - r - 1, r + 1);
 470           else
 471             cc += mpn_sub_n (np + n, np + n, dp + n - r, r);
 472           /* propagate cy */
 473           if (r == n)
 474             qh = cy;
 475           else
 476             qh += mpn_add_1 (qp + r, qp + r, n - r, cy);
 477         }
 478       /* cc is the borrow at np[n+r] */
 479       count = 0;
 480       while (cc > 0) /* quotient was too large */
 481         {
 482           count++;
 483           MPFR_ASSERTD (count <= 1);
 484           cy = mpn_add_n (np + n, np + n, dp + n - (r - k), r - k);
 485           cc -= mpn_add_1 (np + n + r - k, np + n + r - k, k, cy);
 486           qh -= mpn_sub_1 (qp + r - k, qp + r - k, n - (r - k), 1UL);
 487         }
 488       r -= k;
 489       cc = np[n + r];
 490     }
 491   MPFR_TMP_FREE(marker);
 492
 493   return qh;
 494 }
 495 #endif