1 /* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and
2 store the truncated integer part at rootp and the remainder at remp.
4 Contributed by Paul Zimmermann (algorithm) and
5 Paul Zimmermann and Torbjorn Granlund (implementation).
7 THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES. IT'S
8 ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT'S ALMOST
9 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
11 Copyright 2002, 2005, 2009, 2010 Free Software Foundation, Inc.
13 This file is part of the GNU MP Library.
15 The GNU MP Library is free software; you can redistribute it and/or modify
16 it under the terms of the GNU Lesser General Public License as published by
17 the Free Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
20 The GNU MP Library is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
23 License for more details.
25 You should have received a copy of the GNU Lesser General Public License
26 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
29 This implementation is not optimal when remp == NULL, since the complexity
30 is M(n), whereas it should be M(n/k) on average.
33 #include <stdio.h> /* for NULL */
39 static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,
42 #define MPN_RSHIFT(cy,rp,up,un,cnt) \
45 cy = mpn_rshift (rp, up, un, cnt); \
48 MPN_COPY_INCR (rp, up, un); \
53 #define MPN_LSHIFT(cy,rp,up,un,cnt) \
56 cy = mpn_lshift (rp, up, un, cnt); \
59 MPN_COPY_DECR (rp, up, un); \
65 /* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero.
66 If remp <> NULL, put in {remp, un} the remainder.
67 Return the size (in limbs) of the remainder if remp <> NULL,
68 or a non-zero value iff the remainder is non-zero when remp = NULL.
70 (a) up[un-1] is not zero
71 (b) rootp has at least space for ceil(un/k) limbs
72 (c) remp has at least space for un limbs (in case remp <> NULL)
73 (d) the operands do not overlap.
75 The auxiliary memory usage is 3*un+2 if remp = NULL,
76 and 2*un+2 if remp <> NULL. FIXME: This is an incorrect comment.
79 mpn_rootrem (mp_ptr rootp, mp_ptr remp,
80 mp_srcptr up, mp_size_t un, mp_limb_t k)
83 ASSERT (up[un - 1] != 0);
86 if ((remp == NULL) && (un / k > 2))
87 /* call mpn_rootrem recursively, padding {up,un} with k zero limbs,
88 which will produce an approximate root with one more limb,
89 so that in most cases we can conclude. */
96 wp = TMP_ALLOC_LIMBS (wn); /* will contain the padded input */
97 sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */
98 sp = TMP_ALLOC_LIMBS (sn); /* approximate root of padded input */
99 MPN_COPY (wp + k, up, un);
101 rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
102 /* the approximate root S = {sp,sn} is either the correct root of
103 {sp,sn}, or one too large. Thus unless the least significant limb
104 of S is 0 or 1, we can deduce the root of {up,un} is S truncated by
105 one limb. (In case sp[0]=1, we can deduce the root, but not decide
106 whether it is exact or not.) */
107 MPN_COPY (rootp, sp + 1, sn - 1);
111 else /* remp <> NULL */
113 return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
117 /* if approx is non-zero, does not compute the final remainder */
119 mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
120 mp_limb_t k, int approx)
122 mp_ptr qp, rp, sp, wp, scratch;
123 mp_size_t qn, rn, sn, wn, nl, bn;
124 mp_limb_t save, save2, cy;
125 unsigned long int unb; /* number of significant bits of {up,un} */
126 unsigned long int xnb; /* number of significant bits of the result */
129 unsigned long sizes[GMP_NUMB_BITS + 1];
137 /* qp and wp need enough space to store S'^k where S' is an approximate
138 root. Since S' can be as large as S+2, the worst case is when S=2 and
139 S'=4. But then since we know the number of bits of S in advance, S'
140 can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
141 So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
142 fits in un limbs, the number of extra limbs needed is bounded by
143 ceil(k*log2(3/2)/GMP_NUMB_BITS). */
144 #define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
145 qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
146 of R/(k*S^(k-1)), and S^k */
149 rp = TMP_ALLOC_LIMBS (un + 1); /* will contain the remainder */
150 scratch = rp; /* used by mpn_div_q */
154 scratch = TMP_ALLOC_LIMBS (un + 1); /* used by mpn_div_q */
158 wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
159 and temporary for mpn_pow_1 */
160 count_leading_zeros (cnt, up[un - 1]);
161 unb = un * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS;
162 /* unb is the number of bits of the input U */
164 xnb = (unb - 1) / k + 1; /* ceil (unb / k) */
165 /* xnb is the number of bits of the root R */
167 if (xnb == 1) /* root is 1 */
171 mpn_sub_1 (remp, up, un, (mp_limb_t) 1);
172 MPN_NORMALIZE (remp, un); /* There should be at most one zero limb,
173 if we demand u to be normalized */
179 /* We initialize the algorithm with a 1-bit approximation to zero: since we
180 know the root has exactly xnb bits, we write r0 = 2^(xnb-1), so that
181 r0^k = 2^(k*(xnb-1)), that we subtract to the input. */
182 kk = k * (xnb - 1); /* number of truncated bits in the input */
183 rn = un - kk / GMP_NUMB_BITS; /* number of limbs of the non-truncated part */
184 MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
185 mpn_sub_1 (rp, rp, rn, 1); /* subtract the initial approximation: since
186 the non-truncated part is less than 2^k, it
187 is <= k bits: rn <= ceil(k/GMP_NUMB_BITS) */
188 sp[0] = 1; /* initial approximation */
189 sn = 1; /* it has one limb */
191 for (logk = 1; ((k - 1) >> logk) != 0; logk++)
193 /* logk = ceil(log(k)/log(2)) */
195 b = xnb - 1; /* number of remaining bits to determine in the kth root */
199 /* invariant: here we want b+1 total bits for the kth root */
201 /* if c is the new value of b, this means that we'll go from a root
202 of c+1 bits (say s') to a root of b+1 bits.
203 It is proved in the book "Modern Computer Arithmetic" from Brent
204 and Zimmermann, Chapter 1, that
205 if s' >= k*beta, then at most one correction is necessary.
206 Here beta = 2^(b-c), and s' >= 2^c, thus it suffices that
207 c >= ceil((b + log2(k))/2). */
208 b = (b + logk + 1) / 2;
210 b = sizes[ni] - 1; /* add just one bit at a time */
214 ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
215 /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
216 sizes[i] <= 2 * sizes[i+1].
217 Newton iteration will first compute sizes[ni-1] extra bits,
218 then sizes[ni-2], ..., then sizes[0] = b. */
220 wp[0] = 1; /* {sp,sn}^(k-1) = 1 */
222 for (i = ni; i != 0; i--)
224 /* 1: loop invariant:
225 {sp, sn} is the current approximation of the root, which has
226 exactly 1 + sizes[ni] bits.
227 {rp, rn} is the current remainder
228 {wp, wn} = {sp, sn}^(k-1)
229 kk = number of truncated bits of the input
231 b = sizes[i - 1] - sizes[i]; /* number of bits to compute in that
234 /* Reinsert a low zero limb if we normalized away the entire remainder */
241 /* first multiply the remainder by 2^b */
242 MPN_LSHIFT (cy, rp + b / GMP_NUMB_BITS, rp, rn, b % GMP_NUMB_BITS);
243 rn = rn + b / GMP_NUMB_BITS;
252 /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
254 /* Now insert bits [kk,kk+b-1] from the input U */
255 bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[] */
257 /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */
258 nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS);
259 /* nl = 1 + floor((kk + b - 1) / GMP_NUMB_BITS)
260 - floor(kk / GMP_NUMB_BITS)
261 <= 1 + (kk + b - 1) / GMP_NUMB_BITS
262 - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS
263 = 2 + (b - 2) / GMP_NUMB_BITS
264 thus since nl is an integer:
265 nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */
266 /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */
269 MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS);
270 /* set to zero high bits of rp[bn] */
271 rp[bn] &= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS)) - 1;
272 /* restore corresponding bits */
275 rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since
276 they start by bit 0 in rp[0], so they use
277 at most ceil(b/GMP_NUMB_BITS) limbs */
279 /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
281 /* compute {wp, wn} = k * {sp, sn}^(k-1) */
282 cy = mpn_mul_1 (wp, wp, wn, k);
286 /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
288 /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
296 qn = rn - wn; /* expected quotient size */
297 /* tp must have space for wn limbs.
298 The quotient needs rn-wn+1 limbs, thus quotient+remainder
299 need altogether rn+1 limbs. */
300 tp = qp + qn + 1; /* put remainder in Q buffer */
301 mpn_div_q (qp, rp, rn, wp, wn, scratch);
305 /* 5: current buffers: {sp,sn}, {qp,qn}.
306 Note: {rp,rn} is not needed any more since we'll compute it from
307 scratch at the end of the loop.
310 /* Number of limbs used by b bits, when least significant bit is
311 aligned to least limb */
312 bn = (b - 1) / GMP_NUMB_BITS + 1;
314 /* the quotient should be smaller than 2^b, since the previous
315 approximation was correctly rounded toward zero */
316 if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
317 qp[qn - 1] >= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS))))
319 qn = b / GMP_NUMB_BITS + 1; /* b+1 bits */
321 qp[qn - 1] = (mp_limb_t) 1 << (b % GMP_NUMB_BITS);
322 MPN_DECR_U (qp, qn, 1);
323 qn -= qp[qn - 1] == 0;
326 /* 6: current buffers: {sp,sn}, {qp,qn} */
328 /* multiply the root approximation by 2^b */
329 MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS);
330 sn = sn + b / GMP_NUMB_BITS;
337 /* 7: current buffers: {sp,sn}, {qp,qn} */
339 ASSERT_ALWAYS (bn >= qn); /* this is ok since in the case qn > bn
340 above, q is set to 2^b-1, which has
343 /* Combine sB and q to form sB + q. */
344 save = sp[b / GMP_NUMB_BITS];
345 MPN_COPY (sp, qp, qn);
346 MPN_ZERO (sp + qn, bn - qn);
347 sp[b / GMP_NUMB_BITS] |= save;
349 /* 8: current buffer: {sp,sn} */
351 /* Since each iteration treats b bits from the root and thus k*b bits
352 from the input, and we already considered b bits from the input,
353 we now have to take another (k-1)*b bits from the input. */
354 kk -= (k - 1) * b; /* remaining input bits */
355 /* {rp, rn} = floor({up, un} / 2^kk) */
356 MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, un - kk / GMP_NUMB_BITS, kk % GMP_NUMB_BITS);
357 rn = un - kk / GMP_NUMB_BITS;
358 rn -= rp[rn - 1] == 0;
360 /* 9: current buffers: {sp,sn}, {rp,rn} */
364 /* Compute S^k in {qp,qn}. */
367 /* Last iteration: we don't need W anymore. */
368 /* mpn_pow_1 requires that both qp and wp have enough space to
369 store the result {sp,sn}^k + 1 limb */
370 approx = approx && (sp[0] > 1);
371 qn = (approx == 0) ? mpn_pow_1 (qp, sp, sn, k, wp) : 0;
375 /* W <- S^(k-1) for the next iteration,
377 wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
378 mpn_mul (qp, wp, wn, sp, sn);
380 qn -= qp[qn - 1] == 0;
383 /* if S^k > floor(U/2^kk), the root approximation was too large */
384 if (qn > rn || (qn == rn && mpn_cmp (qp, rp, rn) > 0))
385 MPN_DECR_U (sp, sn, 1);
390 /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */
392 ASSERT_ALWAYS (c <= 1);
393 ASSERT_ALWAYS (rn >= qn);
395 /* R = R - Q = floor(U/2^kk) - S^k */
396 if ((i > 1) || (approx == 0))
398 mpn_sub (rp, rp, rn, qp, qn);
399 MPN_NORMALIZE (rp, rn);
401 /* otherwise we have rn > 0, thus the return value is ok */
403 /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */