1 /* mpn_powm -- Compute R = U^E mod M.
3 Copyright 2007, 2008, 2009 Free Software Foundation, Inc.
5 This file is part of the GNU MP Library.
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at your
10 option) any later version.
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
22 BASIC ALGORITHM, Compute b^e mod n, where n is odd.
26 2. While w^2 < n (and there are more bits in e)
27 w <- power left-to-right base-2 without reduction
29 3. t <- (B^n * b) / n Convert to REDC form
31 4. Compute power table of e-dependent size
33 5. While there are more bits in e
34 w <- power left-to-right base-k with reduction
39 * Make getbits a macro, thereby allowing it to update the index operand.
40 That will simplify the code using getbits. (Perhaps make getbits' sibling
41 getbit then have similar form, for symmetry.)
43 * Write an itch function.
45 * Choose window size without looping. (Superoptimize or think(tm).)
47 * How do we handle small bases?
49 * This is slower than old mpz code, in particular if we base it on redc_1
50 (use: #undef HAVE_NATIVE_mpn_addmul_2). Why?
52 * Make it sub-quadratic.
54 * Call new division functions, not mpn_tdiv_qr.
56 * Is redc obsolete with improved SB division?
58 * Consider special code for one-limb M.
60 * CRT for N = odd*2^t:
61 Using Newton's method and 2-adic arithmetic:
62 m1_inv_m2 = 1/odd mod 2^t
63 Plain 2-adic (REDC) modexp:
65 Mullo+sqrlo-based modexp:
68 r = ((r2 - r1) * m1_i_m2 mod 2^t) * odd + r1
70 * How should we handle the redc1/redc2/redc2/redc4/redc_subquad choice?
71 - redc1: T(binvert_1limb) + e * (n) * (T(mullo1x1) + n*T(addmul_1))
72 - redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo2x2) + n*T(addmul_2))
73 - redc3: T(binvert_3limbs) + e * (n/3) * (T(mullo3x3) + n*T(addmul_3))
74 This disregards the addmul_N constant term, but we could think of
75 that as part of the respective mulloNxN.
83 #define getbit(p,bi) \
84 ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
86 static inline mp_limb_t
87 getbits (const mp_limb_t *p, unsigned long bi, int nbits)
95 return p[0] & (((mp_limb_t) 1 << bi) - 1);
99 bi -= nbits; /* bit index of low bit to extract */
100 i = bi / GMP_LIMB_BITS; /* word index of low bit to extract */
101 bi %= GMP_LIMB_BITS; /* bit index in low word */
102 r = p[i] >> bi; /* extract (low) bits */
103 nbits_in_r = GMP_LIMB_BITS - bi; /* number of bits now in r */
104 if (nbits_in_r < nbits) /* did we get enough bits? */
105 r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */
106 return r & (((mp_limb_t ) 1 << nbits) - 1);
110 #undef HAVE_NATIVE_mpn_addmul_2
112 #ifndef HAVE_NATIVE_mpn_addmul_2
113 #define REDC_2_THRESHOLD MP_SIZE_T_MAX
116 #ifndef REDC_2_THRESHOLD
117 #define REDC_2_THRESHOLD 4
120 static void mpn_redc_n () {ASSERT_ALWAYS(0);}
123 win_size (unsigned long eb)
126 static unsigned long x[] = {1,7,25,81,241,673,1793,4609,11521,28161,~0ul};
127 for (k = 0; eb > x[k]; k++)
132 #define MPN_REDC_X(rp, tp, mp, n, mip) \
135 mpn_redc_1 (rp, tp, mp, n, mip[0]); \
136 else if (redc_x == 2) \
137 mpn_redc_2 (rp, tp, mp, n, mip); \
139 mpn_redc_n (rp, tp, mp, n, mip); \
142 /* Convert U to REDC form, U_r = B^n * U mod M */
144 redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
150 tp = TMP_ALLOC_LIMBS (un + n);
151 qp = TMP_ALLOC_LIMBS (un + 1); /* FIXME: Put at tp+? */
154 MPN_COPY (tp + n, up, un);
155 mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
159 /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
160 Requires that mp[n-1..0] is odd.
161 Requires that ep[en-1..0] is > 1.
162 Uses scratch space tp[3n..0], i.e., 3n+1 words. */
164 mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
165 mp_srcptr ep, mp_size_t en,
166 mp_srcptr mp, mp_size_t n, mp_ptr tp)
171 int windowsize, this_windowsize;
173 mp_ptr pp, this_pp, last_pp;
179 ASSERT (en > 1 || (en == 1 && ep[0] > 1));
180 ASSERT (n >= 1 && ((mp[0] & 1) != 0));
184 count_leading_zeros (cnt, ep[en - 1]);
185 ebi = en * GMP_LIMB_BITS - cnt;
190 /* Do the first few exponent bits without mod reductions,
191 until the result is greater than the mod argument. */
194 mpn_sqr_n (tp, this_pp, tn);
195 tn = tn * 2 - 1, tn += tp[tn] != 0;
196 if (getbit (ep, ebi) != 0)
197 mpn_mul (..., tp, tn, bp, bn);
203 windowsize = win_size (ebi);
205 if (BELOW_THRESHOLD (n, REDC_2_THRESHOLD))
207 binvert_limb (mip[0], mp[0]);
211 #if defined (HAVE_NATIVE_mpn_addmul_2)
214 mpn_binvert (mip, mp, 2, tp);
215 mip[0] = -mip[0]; mip[1] = ~mip[1];
220 mpn_binvert (mip, mp, n, tp);
224 pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
227 redcify (this_pp, bp, bn, mp, n);
231 /* Store b^2 in b2. */
232 mpn_sqr_n (tp, this_pp, n);
233 MPN_REDC_X (b2p, tp, mp, n, mip);
235 /* Precompute odd powers of b and put them in the temporary area at pp. */
236 for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
240 mpn_mul_n (tp, last_pp, b2p, n);
241 MPN_REDC_X (this_pp, tp, mp, n, mip);
244 expbits = getbits (ep, ebi, windowsize);
249 count_trailing_zeros (cnt, expbits);
253 MPN_COPY (rp, pp + n * (expbits >> 1), n);
257 while (getbit (ep, ebi) == 0)
259 mpn_sqr_n (tp, rp, n);
260 MPN_REDC_X (rp, tp, mp, n, mip);
266 /* The next bit of the exponent is 1. Now extract the largest block of
267 bits <= windowsize, and such that the least significant bit is 1. */
269 expbits = getbits (ep, ebi, windowsize);
271 this_windowsize = windowsize;
274 this_windowsize += ebi;
278 count_trailing_zeros (cnt, expbits);
279 this_windowsize -= cnt;
285 mpn_sqr_n (tp, rp, n);
286 MPN_REDC_X (rp, tp, mp, n, mip);
289 while (this_windowsize != 0);
291 mpn_mul_n (tp, rp, pp + n * (expbits >> 1), n);
292 MPN_REDC_X (rp, tp, mp, n, mip);
296 MPN_COPY (tp, rp, n);
297 MPN_ZERO (tp + n, n);
298 MPN_REDC_X (rp, tp, mp, n, mip);
299 if (mpn_cmp (rp, mp, n) >= 0)
300 mpn_sub_n (rp, rp, mp, n);