contrib/gmp/mpn/generic/rootrem.c

   1 /* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and
   2    store the truncated integer part at rootp and the remainder at remp.
   3
   4    Contributed by Paul Zimmermann (algorithm) and
   5    Paul Zimmermann and Torbjorn Granlund (implementation).
   6
   7    THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES.  IT'S
   8    ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT'S ALMOST
   9    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  10
  11 Copyright 2002, 2005, 2009, 2010 Free Software Foundation, Inc.
  12
  13 This file is part of the GNU MP Library.
  14
  15 The GNU MP Library is free software; you can redistribute it and/or modify
  16 it under the terms of the GNU Lesser General Public License as published by
  17 the Free Software Foundation; either version 3 of the License, or (at your
  18 option) any later version.
  19
  20 The GNU MP Library is distributed in the hope that it will be useful, but
  21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  22 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  23 License for more details.
  24
  25 You should have received a copy of the GNU Lesser General Public License
  26 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  27
  28 /* FIXME:
  29      This implementation is not optimal when remp == NULL, since the complexity
  30      is M(n), whereas it should be M(n/k) on average.
  31 */
  32
  33 #include <stdio.h>              /* for NULL */
  34
  35 #include "gmp.h"
  36 #include "gmp-impl.h"
  37 #include "longlong.h"
  38
  39 static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,
  40                                        mp_limb_t, int);
  41
  42 #define MPN_RSHIFT(cy,rp,up,un,cnt) \
  43   do {                                                                  \
  44     if ((cnt) != 0)                                                     \
  45       cy = mpn_rshift (rp, up, un, cnt);                                \
  46     else                                                                \
  47       {                                                                 \
  48         MPN_COPY_INCR (rp, up, un);                                     \
  49         cy = 0;                                                         \
  50       }                                                                 \
  51   } while (0)
  52
  53 #define MPN_LSHIFT(cy,rp,up,un,cnt) \
  54   do {                                                                  \
  55     if ((cnt) != 0)                                                     \
  56       cy = mpn_lshift (rp, up, un, cnt);                                \
  57     else                                                                \
  58       {                                                                 \
  59         MPN_COPY_DECR (rp, up, un);                                     \
  60         cy = 0;                                                         \
  61       }                                                                 \
  62   } while (0)
  63
  64
  65 /* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero.
  66    If remp <> NULL, put in {remp, un} the remainder.
  67    Return the size (in limbs) of the remainder if remp <> NULL,
  68           or a non-zero value iff the remainder is non-zero when remp = NULL.
  69    Assumes:
  70    (a) up[un-1] is not zero
  71    (b) rootp has at least space for ceil(un/k) limbs
  72    (c) remp has at least space for un limbs (in case remp <> NULL)
  73    (d) the operands do not overlap.
  74
  75    The auxiliary memory usage is 3*un+2 if remp = NULL,
  76    and 2*un+2 if remp <> NULL.  FIXME: This is an incorrect comment.
  77 */
  78 mp_size_t
  79 mpn_rootrem (mp_ptr rootp, mp_ptr remp,
  80              mp_srcptr up, mp_size_t un, mp_limb_t k)
  81 {
  82   ASSERT (un > 0);
  83   ASSERT (up[un - 1] != 0);
  84   ASSERT (k > 1);
  85
  86   if ((remp == NULL) && (un / k > 2))
  87     /* call mpn_rootrem recursively, padding {up,un} with k zero limbs,
  88        which will produce an approximate root with one more limb,
  89        so that in most cases we can conclude. */
  90     {
  91       mp_ptr sp, wp;
  92       mp_size_t rn, sn, wn;
  93       TMP_DECL;
  94       TMP_MARK;
  95       wn = un + k;
  96       wp = TMP_ALLOC_LIMBS (wn); /* will contain the padded input */
  97       sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */
  98       sp = TMP_ALLOC_LIMBS (sn); /* approximate root of padded input */
  99       MPN_COPY (wp + k, up, un);
 100       MPN_ZERO (wp, k);
 101       rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
 102       /* the approximate root S = {sp,sn} is either the correct root of
 103          {sp,sn}, or one too large. Thus unless the least significant limb
 104          of S is 0 or 1, we can deduce the root of {up,un} is S truncated by
 105          one limb. (In case sp[0]=1, we can deduce the root, but not decide
 106          whether it is exact or not.) */
 107       MPN_COPY (rootp, sp + 1, sn - 1);
 108       TMP_FREE;
 109       return rn;
 110     }
 111   else /* remp <> NULL */
 112     {
 113       return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
 114     }
 115 }
 116
 117 /* if approx is non-zero, does not compute the final remainder */
 118 static mp_size_t
 119 mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
 120                       mp_limb_t k, int approx)
 121 {
 122   mp_ptr qp, rp, sp, wp, scratch;
 123   mp_size_t qn, rn, sn, wn, nl, bn;
 124   mp_limb_t save, save2, cy;
 125   unsigned long int unb; /* number of significant bits of {up,un} */
 126   unsigned long int xnb; /* number of significant bits of the result */
 127   unsigned int cnt;
 128   unsigned long b, kk;
 129   unsigned long sizes[GMP_NUMB_BITS + 1];
 130   int ni, i;
 131   int c;
 132   int logk;
 133   TMP_DECL;
 134
 135   TMP_MARK;
 136
 137   /* qp and wp need enough space to store S'^k where S' is an approximate
 138      root. Since S' can be as large as S+2, the worst case is when S=2 and
 139      S'=4. But then since we know the number of bits of S in advance, S'
 140      can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
 141      So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
 142      fits in un limbs, the number of extra limbs needed is bounded by
 143      ceil(k*log2(3/2)/GMP_NUMB_BITS). */
 144 #define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
 145   qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
 146                                         of R/(k*S^(k-1)), and S^k */
 147   if (remp == NULL)
 148     {
 149       rp = TMP_ALLOC_LIMBS (un + 1);     /* will contain the remainder */
 150       scratch = rp;                      /* used by mpn_div_q */
 151     }
 152   else
 153     {
 154       scratch = TMP_ALLOC_LIMBS (un + 1); /* used by mpn_div_q */
 155       rp = remp;
 156     }
 157   sp = rootp;
 158   wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
 159                                         and temporary for mpn_pow_1 */
 160   count_leading_zeros (cnt, up[un - 1]);
 161   unb = un * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS;
 162   /* unb is the number of bits of the input U */
 163
 164   xnb = (unb - 1) / k + 1;      /* ceil (unb / k) */
 165   /* xnb is the number of bits of the root R */
 166
 167   if (xnb == 1) /* root is 1 */
 168     {
 169       if (remp == NULL)
 170         remp = rp;
 171       mpn_sub_1 (remp, up, un, (mp_limb_t) 1);
 172       MPN_NORMALIZE (remp, un); /* There should be at most one zero limb,
 173                                    if we demand u to be normalized  */
 174       rootp[0] = 1;
 175       TMP_FREE;
 176       return un;
 177     }
 178
 179   /* We initialize the algorithm with a 1-bit approximation to zero: since we
 180      know the root has exactly xnb bits, we write r0 = 2^(xnb-1), so that
 181      r0^k = 2^(k*(xnb-1)), that we subtract to the input. */
 182   kk = k * (xnb - 1);           /* number of truncated bits in the input */
 183   rn = un - kk / GMP_NUMB_BITS; /* number of limbs of the non-truncated part */
 184   MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
 185   mpn_sub_1 (rp, rp, rn, 1);    /* subtract the initial approximation: since
 186                                    the non-truncated part is less than 2^k, it
 187                                    is <= k bits: rn <= ceil(k/GMP_NUMB_BITS) */
 188   sp[0] = 1;                    /* initial approximation */
 189   sn = 1;                       /* it has one limb */
 190
 191   for (logk = 1; ((k - 1) >> logk) != 0; logk++)
 192     ;
 193   /* logk = ceil(log(k)/log(2)) */
 194
 195   b = xnb - 1; /* number of remaining bits to determine in the kth root */
 196   ni = 0;
 197   while (b != 0)
 198     {
 199       /* invariant: here we want b+1 total bits for the kth root */
 200       sizes[ni] = b;
 201       /* if c is the new value of b, this means that we'll go from a root
 202          of c+1 bits (say s') to a root of b+1 bits.
 203          It is proved in the book "Modern Computer Arithmetic" from Brent
 204          and Zimmermann, Chapter 1, that
 205          if s' >= k*beta, then at most one correction is necessary.
 206          Here beta = 2^(b-c), and s' >= 2^c, thus it suffices that
 207          c >= ceil((b + log2(k))/2). */
 208       b = (b + logk + 1) / 2;
 209       if (b >= sizes[ni])
 210         b = sizes[ni] - 1;      /* add just one bit at a time */
 211       ni++;
 212     }
 213   sizes[ni] = 0;
 214   ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
 215   /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
 216      sizes[i] <= 2 * sizes[i+1].
 217      Newton iteration will first compute sizes[ni-1] extra bits,
 218      then sizes[ni-2], ..., then sizes[0] = b. */
 219
 220   wp[0] = 1; /* {sp,sn}^(k-1) = 1 */
 221   wn = 1;
 222   for (i = ni; i != 0; i--)
 223     {
 224       /* 1: loop invariant:
 225          {sp, sn} is the current approximation of the root, which has
 226                   exactly 1 + sizes[ni] bits.
 227          {rp, rn} is the current remainder
 228          {wp, wn} = {sp, sn}^(k-1)
 229          kk = number of truncated bits of the input
 230       */
 231       b = sizes[i - 1] - sizes[i]; /* number of bits to compute in that
 232                                       iteration */
 233
 234       /* Reinsert a low zero limb if we normalized away the entire remainder */
 235       if (rn == 0)
 236         {
 237           rp[0] = 0;
 238           rn = 1;
 239         }
 240
 241       /* first multiply the remainder by 2^b */
 242       MPN_LSHIFT (cy, rp + b / GMP_NUMB_BITS, rp, rn, b % GMP_NUMB_BITS);
 243       rn = rn + b / GMP_NUMB_BITS;
 244       if (cy != 0)
 245         {
 246           rp[rn] = cy;
 247           rn++;
 248         }
 249
 250       kk = kk - b;
 251
 252       /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
 253
 254       /* Now insert bits [kk,kk+b-1] from the input U */
 255       bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[] */
 256       save = rp[bn];
 257       /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */
 258       nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS);
 259       /* nl  = 1 + floor((kk + b - 1) / GMP_NUMB_BITS)
 260                  - floor(kk / GMP_NUMB_BITS)
 261              <= 1 + (kk + b - 1) / GMP_NUMB_BITS
 262                   - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS
 263              = 2 + (b - 2) / GMP_NUMB_BITS
 264          thus since nl is an integer:
 265          nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */
 266       /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */
 267       if (nl - 1 > bn)
 268         save2 = rp[bn + 1];
 269       MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS);
 270       /* set to zero high bits of rp[bn] */
 271       rp[bn] &= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS)) - 1;
 272       /* restore corresponding bits */
 273       rp[bn] |= save;
 274       if (nl - 1 > bn)
 275         rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since
 276                                they start by bit 0 in rp[0], so they use
 277                                at most ceil(b/GMP_NUMB_BITS) limbs */
 278
 279       /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
 280
 281       /* compute {wp, wn} = k * {sp, sn}^(k-1) */
 282       cy = mpn_mul_1 (wp, wp, wn, k);
 283       wp[wn] = cy;
 284       wn += cy != 0;
 285
 286       /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
 287
 288       /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
 289       if (rn < wn)
 290         {
 291           qn = 0;
 292         }
 293       else
 294         {
 295           mp_ptr tp;
 296           qn = rn - wn; /* expected quotient size */
 297           /* tp must have space for wn limbs.
 298              The quotient needs rn-wn+1 limbs, thus quotient+remainder
 299              need altogether rn+1 limbs. */
 300           tp = qp + qn + 1;     /* put remainder in Q buffer */
 301           mpn_div_q (qp, rp, rn, wp, wn, scratch);
 302           qn += qp[qn] != 0;
 303         }
 304
 305       /* 5: current buffers: {sp,sn}, {qp,qn}.
 306          Note: {rp,rn} is not needed any more since we'll compute it from
 307          scratch at the end of the loop.
 308        */
 309
 310       /* Number of limbs used by b bits, when least significant bit is
 311          aligned to least limb */
 312       bn = (b - 1) / GMP_NUMB_BITS + 1;
 313
 314       /* the quotient should be smaller than 2^b, since the previous
 315          approximation was correctly rounded toward zero */
 316       if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
 317                       qp[qn - 1] >= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS))))
 318         {
 319           qn = b / GMP_NUMB_BITS + 1; /* b+1 bits */
 320           MPN_ZERO (qp, qn);
 321           qp[qn - 1] = (mp_limb_t) 1 << (b % GMP_NUMB_BITS);
 322           MPN_DECR_U (qp, qn, 1);
 323           qn -= qp[qn - 1] == 0;
 324         }
 325
 326       /* 6: current buffers: {sp,sn}, {qp,qn} */
 327
 328       /* multiply the root approximation by 2^b */
 329       MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS);
 330       sn = sn + b / GMP_NUMB_BITS;
 331       if (cy != 0)
 332         {
 333           sp[sn] = cy;
 334           sn++;
 335         }
 336
 337       /* 7: current buffers: {sp,sn}, {qp,qn} */
 338
 339       ASSERT_ALWAYS (bn >= qn); /* this is ok since in the case qn > bn
 340                                    above, q is set to 2^b-1, which has
 341                                    exactly bn limbs */
 342
 343       /* Combine sB and q to form sB + q.  */
 344       save = sp[b / GMP_NUMB_BITS];
 345       MPN_COPY (sp, qp, qn);
 346       MPN_ZERO (sp + qn, bn - qn);
 347       sp[b / GMP_NUMB_BITS] |= save;
 348
 349       /* 8: current buffer: {sp,sn} */
 350
 351       /* Since each iteration treats b bits from the root and thus k*b bits
 352          from the input, and we already considered b bits from the input,
 353          we now have to take another (k-1)*b bits from the input. */
 354       kk -= (k - 1) * b; /* remaining input bits */
 355       /* {rp, rn} = floor({up, un} / 2^kk) */
 356       MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, un - kk / GMP_NUMB_BITS, kk % GMP_NUMB_BITS);
 357       rn = un - kk / GMP_NUMB_BITS;
 358       rn -= rp[rn - 1] == 0;
 359
 360       /* 9: current buffers: {sp,sn}, {rp,rn} */
 361
 362      for (c = 0;; c++)
 363         {
 364           /* Compute S^k in {qp,qn}. */
 365           if (i == 1)
 366             {
 367               /* Last iteration: we don't need W anymore. */
 368               /* mpn_pow_1 requires that both qp and wp have enough space to
 369                  store the result {sp,sn}^k + 1 limb */
 370               approx = approx && (sp[0] > 1);
 371               qn = (approx == 0) ? mpn_pow_1 (qp, sp, sn, k, wp) : 0;
 372             }
 373           else
 374             {
 375               /* W <- S^(k-1) for the next iteration,
 376                  and S^k = W * S. */
 377               wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
 378               mpn_mul (qp, wp, wn, sp, sn);
 379               qn = wn + sn;
 380               qn -= qp[qn - 1] == 0;
 381             }
 382
 383           /* if S^k > floor(U/2^kk), the root approximation was too large */
 384           if (qn > rn || (qn == rn && mpn_cmp (qp, rp, rn) > 0))
 385             MPN_DECR_U (sp, sn, 1);
 386           else
 387             break;
 388         }
 389
 390       /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */
 391
 392       ASSERT_ALWAYS (c <= 1);
 393       ASSERT_ALWAYS (rn >= qn);
 394
 395       /* R = R - Q = floor(U/2^kk) - S^k */
 396       if ((i > 1) || (approx == 0))
 397         {
 398           mpn_sub (rp, rp, rn, qp, qn);
 399           MPN_NORMALIZE (rp, rn);
 400         }
 401       /* otherwise we have rn > 0, thus the return value is ok */
 402
 403       /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
 404     }
 405
 406   TMP_FREE;
 407   return rn;
 408 }