1 /* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
3 Contributed by Paul Zimmermann.
5 Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2009
6 Free Software Foundation, Inc.
8 This file is part of the GNU MP Library.
10 The GNU MP Library is free software; you can redistribute it and/or modify
11 it under the terms of the GNU Lesser General Public License as published by
12 the Free Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
15 The GNU MP Library is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
18 License for more details.
20 You should have received a copy of the GNU Lesser General Public License
21 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
31 /* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
32 t is defined by (tp,mn). */
34 reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn)
40 qp = TMP_ALLOC_LIMBS (an - mn + 1);
42 mpn_tdiv_qr (qp, tp, 0L, ap, an, mp, mn);
48 /* Return the group order of the ring mod m. */
70 unsigned long int q = t / d;
89 /* average number of calls to redc for an exponent of n bits
90 with the sliding window algorithm of base 2^k: the optimal is
91 obtained for the value of k which minimizes 2^(k-1)+n/(k+1):
94 128 156* 159 171 200 261
95 256 309 307* 316 343 403
96 512 617 607* 610 632 688
97 1024 1231 1204 1195* 1207 1256
98 2048 2461 2399 2366 2360* 2396
99 4096 4918 4787 4707 4665* 4670
103 /* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD. In REDC
104 each modular multiplication costs about 2*n^2 limbs operations, whereas
105 using usual reduction it costs 3*K(n), where K(n) is the cost of a
106 multiplication using Karatsuba, and a division is assumed to cost 2*K(n),
107 for example using Burnikel-Ziegler's algorithm. This gives a theoretical
108 threshold of a*SQR_KARATSUBA_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~
110 /* For now, also disable REDC when MOD is even, as the inverse can't handle
111 that. At some point, we might want to make the code faster for that case,
112 perhaps using CRR. */
114 #ifndef POWM_THRESHOLD
115 #define POWM_THRESHOLD ((8 * SQR_KARATSUBA_THRESHOLD) / 3)
118 #define HANDLE_NEGATIVE_EXPONENT 1
119 #undef REDUCE_EXPONENT
123 mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
124 #else /* BERKELEY_MP */
125 pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
126 #endif /* BERKELEY_MP */
128 mp_ptr xp, tp, qp, gp, this_gp;
129 mp_srcptr bp, ep, mp;
130 mp_size_t bn, es, en, mn, xn;
132 unsigned long int enb;
133 mp_size_t i, K, j, l, k;
134 int m_zero_cnt, e_zero_cnt;
137 #if HANDLE_NEGATIVE_EXPONENT
157 /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if
159 SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
161 TMP_FREE; /* we haven't really allocated anything here */
164 #if HANDLE_NEGATIVE_EXPONENT
165 MPZ_TMP_INIT (new_b, mn + 1);
167 if (! mpz_invert (new_b, b, m))
178 /* Reduce exponent by dividing it by phi(m) when m small. */
179 if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150)
181 MPZ_TMP_INIT (new_e, 2);
182 mpz_mod_ui (new_e, e, phi (mp[0]));
187 use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0;
190 /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */
191 modlimb_invert (invm, mp[0]);
196 /* Normalize m (i.e. make its most significant bit set) as required by
197 division functions below. */
198 count_leading_zeros (m_zero_cnt, mp[mn - 1]);
199 m_zero_cnt -= GMP_NAIL_BITS;
203 new_mp = TMP_ALLOC_LIMBS (mn);
204 mpn_lshift (new_mp, mp, mn, m_zero_cnt);
209 /* Determine optimal value of k, the number of exponent bits we look at
211 count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]);
212 e_zero_cnt -= GMP_NAIL_BITS;
213 enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */
216 while (2 * enb > K * (2 + k * (3 + k)))
220 if (k == 10) /* cap allocation */
224 tp = TMP_ALLOC_LIMBS (2 * mn);
225 qp = TMP_ALLOC_LIMBS (mn + 1);
227 gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn);
229 /* Compute x*R^n where R=2^BITS_PER_MP_LIMB. */
232 /* Handle |b| >= m by computing b mod m. FIXME: It is not strictly necessary
233 for speed or correctness to do this when b and m have the same number of
234 limbs, perhaps remove mpn_cmp call. */
235 if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0))
237 /* Reduce possibly huge base while moving it to gp[0]. Use a function
238 call to reduce, since we don't want the quotient allocation to
239 live until function return. */
242 reduce (tp + mn, bp, bn, mp, mn); /* b mod m */
244 mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */
248 reduce (gp, bp, bn, mp, mn);
253 /* |b| < m. We pad out operands to become mn limbs, which simplifies
254 the rest of the function, but slows things down when |b| << m. */
258 MPN_COPY (tp + mn, bp, bn);
259 MPN_ZERO (tp + mn + bn, mn - bn);
260 mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn);
264 MPN_COPY (gp, bp, bn);
265 MPN_ZERO (gp + bn, mn - bn);
269 /* Compute xx^i for odd g < 2^i. */
271 xp = TMP_ALLOC_LIMBS (mn);
272 mpn_sqr_n (tp, gp, mn);
274 mpn_redc_1 (xp, tp, mp, mn, invm); /* xx = x^2*R^n */
276 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
278 for (i = 1; i < K / 2; i++)
280 mpn_mul_n (tp, this_gp, xp, mn);
283 mpn_redc_1 (this_gp, tp, mp, mn, invm); /* g[i] = x^(2i+1)*R^n */
285 mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn);
288 /* Start the real stuff. */
290 i = en - 1; /* current index */
291 c = ep[i]; /* current limb */
292 sh = GMP_NUMB_BITS - e_zero_cnt; /* significant bits in ep[i] */
293 sh -= k; /* index of lower bit of ep[i] to take into account */
295 { /* k-sh extra bits are needed */
307 for (j = 0; c % 2 == 0; j++)
310 MPN_COPY (xp, gp + mn * (c >> 1), mn);
313 mpn_sqr_n (tp, xp, mn);
315 mpn_redc_1 (xp, tp, mp, mn, invm);
317 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
320 while (i > 0 || sh > 0)
323 l = k; /* number of bits treated */
336 l += sh; /* last chunk of bits from e; l < k */
341 c &= ((mp_limb_t) 1 << l) - 1;
343 /* This while loop implements the sliding window improvement--loop while
344 the most significant bit of c is zero, squaring xx as we go. */
345 while ((c >> (l - 1)) == 0 && (i > 0 || sh > 0))
347 mpn_sqr_n (tp, xp, mn);
349 mpn_redc_1 (xp, tp, mp, mn, invm);
351 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
355 c = (c << 1) + ((ep[i] >> sh) & 1);
360 sh = GMP_NUMB_BITS - 1;
361 c = (c << 1) + (ep[i] >> sh);
365 /* Replace xx by xx^(2^l)*x^c. */
368 for (j = 0; c % 2 == 0; j++)
371 /* c0 = c * 2^j, i.e. xx^(2^l)*x^c = (A^(2^(l - j))*c)^(2^j) */
375 mpn_sqr_n (tp, xp, mn);
377 mpn_redc_1 (xp, tp, mp, mn, invm);
379 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
381 mpn_mul_n (tp, xp, gp + mn * (c >> 1), mn);
383 mpn_redc_1 (xp, tp, mp, mn, invm);
385 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
388 j = l; /* case c=0 */
391 mpn_sqr_n (tp, xp, mn);
393 mpn_redc_1 (xp, tp, mp, mn, invm);
395 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
401 /* Convert back xx to xx/R^n. */
402 MPN_COPY (tp, xp, mn);
403 MPN_ZERO (tp + mn, mn);
404 mpn_redc_1 (xp, tp, mp, mn, invm);
405 if (mpn_cmp (xp, mp, mn) >= 0)
406 mpn_sub_n (xp, xp, mp, mn);
413 cy = mpn_lshift (tp, xp, mn, m_zero_cnt);
415 mpn_tdiv_qr (qp, xp, 0L, tp, mn + (cy != 0), mp, mn);
416 mpn_rshift (xp, xp, mn, m_zero_cnt);
420 MPN_NORMALIZE (xp, xn);
422 if ((ep[0] & 1) && SIZ(b) < 0 && xn != 0)
424 mp = PTR(m); /* want original, unnormalized m */
425 mpn_sub (xp, mp, mn, xp, xn);
427 MPN_NORMALIZE (xp, xn);
431 MPN_COPY (PTR(r), xp, xn);
433 __GMP_FREE_FUNC_LIMBS (gp, K / 2 * mn);