contrib/gmp/mpz/powm.c

   1 /* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
   2
   3    Contributed by Paul Zimmermann.
   4
   5 Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2009
   6 Free Software Foundation, Inc.
   7
   8 This file is part of the GNU MP Library.
   9
  10 The GNU MP Library is free software; you can redistribute it and/or modify
  11 it under the terms of the GNU Lesser General Public License as published by
  12 the Free Software Foundation; either version 3 of the License, or (at your
  13 option) any later version.
  14
  15 The GNU MP Library is distributed in the hope that it will be useful, but
  16 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  17 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  18 License for more details.
  19
  20 You should have received a copy of the GNU Lesser General Public License
  21 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  22
  23
  24 #include "gmp.h"
  25 #include "gmp-impl.h"
  26 #include "longlong.h"
  27 #ifdef BERKELEY_MP
  28 #include "mp.h"
  29 #endif
  30
  31 /* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
  32    t is defined by (tp,mn).  */
  33 static void
  34 reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn)
  35 {
  36   mp_ptr qp;
  37   TMP_DECL;
  38
  39   TMP_MARK;
  40   qp = TMP_ALLOC_LIMBS (an - mn + 1);
  41
  42   mpn_tdiv_qr (qp, tp, 0L, ap, an, mp, mn);
  43
  44   TMP_FREE;
  45 }
  46
  47 #if REDUCE_EXPONENT
  48 /* Return the group order of the ring mod m.  */
  49 static mp_limb_t
  50 phi (mp_limb_t t)
  51 {
  52   mp_limb_t d, m, go;
  53
  54   go = 1;
  55
  56   if (t % 2 == 0)
  57     {
  58       t = t / 2;
  59       while (t % 2 == 0)
  60         {
  61           go *= 2;
  62           t = t / 2;
  63         }
  64     }
  65   for (d = 3;; d += 2)
  66     {
  67       m = d - 1;
  68       for (;;)
  69         {
  70           unsigned long int q = t / d;
  71           if (q < d)
  72             {
  73               if (t <= 1)
  74                 return go;
  75               if (t == d)
  76                 return go * m;
  77               return go * (t - 1);
  78             }
  79           if (t != q * d)
  80             break;
  81           go *= m;
  82           m = d;
  83           t = q;
  84         }
  85     }
  86 }
  87 #endif
  88
  89 /* average number of calls to redc for an exponent of n bits
  90    with the sliding window algorithm of base 2^k: the optimal is
  91    obtained for the value of k which minimizes 2^(k-1)+n/(k+1):
  92
  93    n\k    4     5     6     7     8
  94    128    156*  159   171   200   261
  95    256    309   307*  316   343   403
  96    512    617   607*  610   632   688
  97    1024   1231  1204  1195* 1207  1256
  98    2048   2461  2399  2366  2360* 2396
  99    4096   4918  4787  4707  4665* 4670
 100 */
 101 \f
 102
 103 /* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD.  In REDC
 104    each modular multiplication costs about 2*n^2 limbs operations, whereas
 105    using usual reduction it costs 3*K(n), where K(n) is the cost of a
 106    multiplication using Karatsuba, and a division is assumed to cost 2*K(n),
 107    for example using Burnikel-Ziegler's algorithm. This gives a theoretical
 108    threshold of a*SQR_KARATSUBA_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~
 109    2.66.  */
 110 /* For now, also disable REDC when MOD is even, as the inverse can't handle
 111    that.  At some point, we might want to make the code faster for that case,
 112    perhaps using CRR.  */
 113
 114 #ifndef POWM_THRESHOLD
 115 #define POWM_THRESHOLD  ((8 * SQR_KARATSUBA_THRESHOLD) / 3)
 116 #endif
 117
 118 #define HANDLE_NEGATIVE_EXPONENT 1
 119 #undef REDUCE_EXPONENT
 120
 121 void
 122 #ifndef BERKELEY_MP
 123 mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
 124 #else /* BERKELEY_MP */
 125 pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
 126 #endif /* BERKELEY_MP */
 127 {
 128   mp_ptr xp, tp, qp, gp, this_gp;
 129   mp_srcptr bp, ep, mp;
 130   mp_size_t bn, es, en, mn, xn;
 131   mp_limb_t invm, c;
 132   unsigned long int enb;
 133   mp_size_t i, K, j, l, k;
 134   int m_zero_cnt, e_zero_cnt;
 135   int sh;
 136   int use_redc;
 137 #if HANDLE_NEGATIVE_EXPONENT
 138   mpz_t new_b;
 139 #endif
 140 #if REDUCE_EXPONENT
 141   mpz_t new_e;
 142 #endif
 143   TMP_DECL;
 144
 145   mp = PTR(m);
 146   mn = ABSIZ (m);
 147   if (mn == 0)
 148     DIVIDE_BY_ZERO;
 149
 150   TMP_MARK;
 151
 152   es = SIZ (e);
 153   if (es <= 0)
 154     {
 155       if (es == 0)
 156         {
 157           /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if
 158              m equals 1.  */
 159           SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
 160           PTR(r)[0] = 1;
 161           TMP_FREE;     /* we haven't really allocated anything here */
 162           return;
 163         }
 164 #if HANDLE_NEGATIVE_EXPONENT
 165       MPZ_TMP_INIT (new_b, mn + 1);
 166
 167       if (! mpz_invert (new_b, b, m))
 168         DIVIDE_BY_ZERO;
 169       b = new_b;
 170       es = -es;
 171 #else
 172       DIVIDE_BY_ZERO;
 173 #endif
 174     }
 175   en = es;
 176
 177 #if REDUCE_EXPONENT
 178   /* Reduce exponent by dividing it by phi(m) when m small.  */
 179   if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150)
 180     {
 181       MPZ_TMP_INIT (new_e, 2);
 182       mpz_mod_ui (new_e, e, phi (mp[0]));
 183       e = new_e;
 184     }
 185 #endif
 186
 187   use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0;
 188   if (use_redc)
 189     {
 190       /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */
 191       modlimb_invert (invm, mp[0]);
 192       invm = -invm;
 193     }
 194   else
 195     {
 196       /* Normalize m (i.e. make its most significant bit set) as required by
 197          division functions below.  */
 198       count_leading_zeros (m_zero_cnt, mp[mn - 1]);
 199       m_zero_cnt -= GMP_NAIL_BITS;
 200       if (m_zero_cnt != 0)
 201         {
 202           mp_ptr new_mp;
 203           new_mp = TMP_ALLOC_LIMBS (mn);
 204           mpn_lshift (new_mp, mp, mn, m_zero_cnt);
 205           mp = new_mp;
 206         }
 207     }
 208
 209   /* Determine optimal value of k, the number of exponent bits we look at
 210      at a time.  */
 211   count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]);
 212   e_zero_cnt -= GMP_NAIL_BITS;
 213   enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */
 214   k = 1;
 215   K = 2;
 216   while (2 * enb > K * (2 + k * (3 + k)))
 217     {
 218       k++;
 219       K *= 2;
 220       if (k == 10)                      /* cap allocation */
 221         break;
 222     }
 223
 224   tp = TMP_ALLOC_LIMBS (2 * mn);
 225   qp = TMP_ALLOC_LIMBS (mn + 1);
 226
 227   gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn);
 228
 229   /* Compute x*R^n where R=2^BITS_PER_MP_LIMB.  */
 230   bn = ABSIZ (b);
 231   bp = PTR(b);
 232   /* Handle |b| >= m by computing b mod m.  FIXME: It is not strictly necessary
 233      for speed or correctness to do this when b and m have the same number of
 234      limbs, perhaps remove mpn_cmp call.  */
 235   if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0))
 236     {
 237       /* Reduce possibly huge base while moving it to gp[0].  Use a function
 238          call to reduce, since we don't want the quotient allocation to
 239          live until function return.  */
 240       if (use_redc)
 241         {
 242           reduce (tp + mn, bp, bn, mp, mn);     /* b mod m */
 243           MPN_ZERO (tp, mn);
 244           mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */
 245         }
 246       else
 247         {
 248           reduce (gp, bp, bn, mp, mn);
 249         }
 250     }
 251   else
 252     {
 253       /* |b| < m.  We pad out operands to become mn limbs,  which simplifies
 254          the rest of the function, but slows things down when |b| << m.  */
 255       if (use_redc)
 256         {
 257           MPN_ZERO (tp, mn);
 258           MPN_COPY (tp + mn, bp, bn);
 259           MPN_ZERO (tp + mn + bn, mn - bn);
 260           mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn);
 261         }
 262       else
 263         {
 264           MPN_COPY (gp, bp, bn);
 265           MPN_ZERO (gp + bn, mn - bn);
 266         }
 267     }
 268
 269   /* Compute xx^i for odd g < 2^i.  */
 270
 271   xp = TMP_ALLOC_LIMBS (mn);
 272   mpn_sqr_n (tp, gp, mn);
 273   if (use_redc)
 274     mpn_redc_1 (xp, tp, mp, mn, invm);          /* xx = x^2*R^n */
 275   else
 276     mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
 277   this_gp = gp;
 278   for (i = 1; i < K / 2; i++)
 279     {
 280       mpn_mul_n (tp, this_gp, xp, mn);
 281       this_gp += mn;
 282       if (use_redc)
 283         mpn_redc_1 (this_gp, tp, mp, mn, invm); /* g[i] = x^(2i+1)*R^n */
 284       else
 285         mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn);
 286     }
 287
 288   /* Start the real stuff.  */
 289   ep = PTR (e);
 290   i = en - 1;                           /* current index */
 291   c = ep[i];                            /* current limb */
 292   sh = GMP_NUMB_BITS - e_zero_cnt;      /* significant bits in ep[i] */
 293   sh -= k;                              /* index of lower bit of ep[i] to take into account */
 294   if (sh < 0)
 295     {                                   /* k-sh extra bits are needed */
 296       if (i > 0)
 297         {
 298           i--;
 299           c <<= (-sh);
 300           sh += GMP_NUMB_BITS;
 301           c |= ep[i] >> sh;
 302         }
 303     }
 304   else
 305     c >>= sh;
 306
 307   for (j = 0; c % 2 == 0; j++)
 308     c >>= 1;
 309
 310   MPN_COPY (xp, gp + mn * (c >> 1), mn);
 311   while (--j >= 0)
 312     {
 313       mpn_sqr_n (tp, xp, mn);
 314       if (use_redc)
 315         mpn_redc_1 (xp, tp, mp, mn, invm);
 316       else
 317         mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
 318     }
 319
 320   while (i > 0 || sh > 0)
 321     {
 322       c = ep[i];
 323       l = k;                            /* number of bits treated */
 324       sh -= l;
 325       if (sh < 0)
 326         {
 327           if (i > 0)
 328             {
 329               i--;
 330               c <<= (-sh);
 331               sh += GMP_NUMB_BITS;
 332               c |= ep[i] >> sh;
 333             }
 334           else
 335             {
 336               l += sh;                  /* last chunk of bits from e; l < k */
 337             }
 338         }
 339       else
 340         c >>= sh;
 341       c &= ((mp_limb_t) 1 << l) - 1;
 342
 343       /* This while loop implements the sliding window improvement--loop while
 344          the most significant bit of c is zero, squaring xx as we go. */
 345       while ((c >> (l - 1)) == 0 && (i > 0 || sh > 0))
 346         {
 347           mpn_sqr_n (tp, xp, mn);
 348           if (use_redc)
 349             mpn_redc_1 (xp, tp, mp, mn, invm);
 350           else
 351             mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
 352           if (sh != 0)
 353             {
 354               sh--;
 355               c = (c << 1) + ((ep[i] >> sh) & 1);
 356             }
 357           else
 358             {
 359               i--;
 360               sh = GMP_NUMB_BITS - 1;
 361               c = (c << 1) + (ep[i] >> sh);
 362             }
 363         }
 364
 365       /* Replace xx by xx^(2^l)*x^c.  */
 366       if (c != 0)
 367         {
 368           for (j = 0; c % 2 == 0; j++)
 369             c >>= 1;
 370
 371           /* c0 = c * 2^j, i.e. xx^(2^l)*x^c = (A^(2^(l - j))*c)^(2^j) */
 372           l -= j;
 373           while (--l >= 0)
 374             {
 375               mpn_sqr_n (tp, xp, mn);
 376               if (use_redc)
 377                 mpn_redc_1 (xp, tp, mp, mn, invm);
 378               else
 379                 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
 380             }
 381           mpn_mul_n (tp, xp, gp + mn * (c >> 1), mn);
 382           if (use_redc)
 383             mpn_redc_1 (xp, tp, mp, mn, invm);
 384           else
 385             mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
 386         }
 387       else
 388         j = l;                          /* case c=0 */
 389       while (--j >= 0)
 390         {
 391           mpn_sqr_n (tp, xp, mn);
 392           if (use_redc)
 393             mpn_redc_1 (xp, tp, mp, mn, invm);
 394           else
 395             mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
 396         }
 397     }
 398
 399   if (use_redc)
 400     {
 401       /* Convert back xx to xx/R^n.  */
 402       MPN_COPY (tp, xp, mn);
 403       MPN_ZERO (tp + mn, mn);
 404       mpn_redc_1 (xp, tp, mp, mn, invm);
 405       if (mpn_cmp (xp, mp, mn) >= 0)
 406         mpn_sub_n (xp, xp, mp, mn);
 407     }
 408   else
 409     {
 410       if (m_zero_cnt != 0)
 411         {
 412           mp_limb_t cy;
 413           cy = mpn_lshift (tp, xp, mn, m_zero_cnt);
 414           tp[mn] = cy;
 415           mpn_tdiv_qr (qp, xp, 0L, tp, mn + (cy != 0), mp, mn);
 416           mpn_rshift (xp, xp, mn, m_zero_cnt);
 417         }
 418     }
 419   xn = mn;
 420   MPN_NORMALIZE (xp, xn);
 421
 422   if ((ep[0] & 1) && SIZ(b) < 0 && xn != 0)
 423     {
 424       mp = PTR(m);                      /* want original, unnormalized m */
 425       mpn_sub (xp, mp, mn, xp, xn);
 426       xn = mn;
 427       MPN_NORMALIZE (xp, xn);
 428     }
 429   MPZ_REALLOC (r, xn);
 430   SIZ (r) = xn;
 431   MPN_COPY (PTR(r), xp, xn);
 432
 433   __GMP_FREE_FUNC_LIMBS (gp, K / 2 * mn);
 434   TMP_FREE;
 435 }