1 /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
3 Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
5 This file is part of the GNU MP Library.
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Library General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or (at your
10 option) any later version.
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15 License for more details.
17 You should have received a copy of the GNU Library General Public License
18 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20 MA 02111-1307, USA. */
22 /* Integer greatest common divisor of two unsigned integers, using
23 the accelerated algorithm (see reference below).
25 mp_size_t mpn_gcd (vp, vsize, up, usize).
27 Preconditions [U = (up, usize) and V = (vp, vsize)]:
30 2. numbits(U) >= numbits(V).
32 Both U and V are destroyed by the operation. The result is left at vp,
33 and its size is returned.
35 Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
37 Funding for this work has been partially provided by Conselho Nacional
38 de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
39 301314194-2, and was done while I was a visiting reseacher in the Instituto
40 de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
43 K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
44 Mathematical Software, v. 21 (March), 1995, pp. 111-122. */
50 /* If MIN (usize, vsize) > ACCEL_THRESHOLD, then the accelerated algorithm is
51 used, otherwise the binary algorithm is used. This may be adjusted for
52 different architectures. */
53 #ifndef ACCEL_THRESHOLD
54 #define ACCEL_THRESHOLD 4
57 /* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated
58 algorithm reduces using the bmod operation. Otherwise, the k-ary reduction
59 is used. 0 <= BMOD_THRESHOLD < BITS_PER_MP_LIMB. */
62 BMOD_THRESHOLD = BITS_PER_MP_LIMB/2
65 #define SIGN_BIT (~(~(mp_limb_t)0 >> 1))
68 #define SWAP_LIMB(UL, VL) do{mp_limb_t __l=(UL);(UL)=(VL);(VL)=__l;}while(0)
69 #define SWAP_PTR(UP, VP) do{mp_ptr __p=(UP);(UP)=(VP);(VP)=__p;}while(0)
70 #define SWAP_SZ(US, VS) do{mp_size_t __s=(US);(US)=(VS);(VS)=__s;}while(0)
71 #define SWAP_MPN(UP, US, VP, VS) do{SWAP_PTR(UP,VP);SWAP_SZ(US,VS);}while(0)
73 /* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2.
74 Both U and V must be odd. */
75 static __gmp_inline mp_size_t
77 gcd_2 (mp_ptr vp, mp_srcptr up)
84 mp_limb_t u0, u1, v0, v1;
87 u0 = up[0], u1 = up[1], v0 = vp[0], v1 = vp[1];
89 while (u1 != v1 && u0 != v0)
94 u1 -= v1 + (u0 < v0), u0 -= v0;
95 count_trailing_zeros (r, u0);
96 u0 = u1 << (BITS_PER_MP_LIMB - r) | u0 >> r;
101 v1 -= u1 + (v0 < u0), v0 -= u0;
102 count_trailing_zeros (r, v0);
103 v0 = v1 << (BITS_PER_MP_LIMB - r) | v0 >> r;
108 vp[0] = v0, vp[1] = v1, vsize = 1 + (v1 != 0);
110 /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */
111 if (u1 == v1 && u0 == v0)
114 v0 = (u0 == v0) ? (u1 > v1) ? u1-v1 : v1-u1 : (u0 > v0) ? u0-v0 : v0-u0;
115 vp[0] = mpn_gcd_1 (vp, vsize, v0);
120 /* The function find_a finds 0 < N < 2^BITS_PER_MP_LIMB such that there exists
121 0 < |D| < 2^BITS_PER_MP_LIMB, and N == D * C mod 2^(2*BITS_PER_MP_LIMB).
122 In the reference article, D was computed along with N, but it is better to
123 compute D separately as D <-- N / C mod 2^(BITS_PER_MP_LIMB + 1), treating
124 the result as a twos' complement signed integer.
126 Initialize N1 to C mod 2^(2*BITS_PER_MP_LIMB). According to the reference
127 article, N2 should be initialized to 2^(2*BITS_PER_MP_LIMB), but we use
128 2^(2*BITS_PER_MP_LIMB) - N1 to start the calculations within double
129 precision. If N2 > N1 initially, the first iteration of the while loop
130 will swap them. In all other situations, N1 >= N2 is maintained. */
132 static __gmp_inline mp_limb_t
134 find_a (mp_srcptr cp)
140 unsigned long int leading_zero_bits = 0;
142 mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l. */
143 mp_limb_t n1_h = cp[1];
145 mp_limb_t n2_l = -n1_l; /* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l. */
146 mp_limb_t n2_h = ~n1_h;
149 while (n2_h) /* While N2 >= 2^BITS_PER_MP_LIMB. */
151 /* N1 <-- N1 % N2. */
152 if ((SIGN_BIT >> leading_zero_bits & n2_h) == 0)
155 count_leading_zeros (i, n2_h);
156 i -= leading_zero_bits, leading_zero_bits += i;
157 n2_h = n2_h<<i | n2_l>>(BITS_PER_MP_LIMB - i), n2_l <<= i;
160 if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
161 n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
162 n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1;
167 if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
168 n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
170 SWAP_LIMB (n1_h, n2_h);
171 SWAP_LIMB (n1_l, n2_l);
179 mpn_gcd (mp_ptr gp, mp_ptr vp, mp_size_t vsize, mp_ptr up, mp_size_t usize)
181 mpn_gcd (gp, vp, vsize, up, usize)
190 mp_size_t orig_vsize = vsize;
191 int binary_gcd_ctr; /* Number of times binary gcd will execute. */
196 /* Use accelerated algorithm if vsize is over ACCEL_THRESHOLD.
197 Two EXTRA limbs for U and V are required for kary reduction. */
198 if (vsize > ACCEL_THRESHOLD)
200 unsigned long int vbitsize, d;
202 mp_size_t orig_usize = usize;
203 mp_ptr anchor_up = (mp_ptr) TMP_ALLOC ((usize + 2) * BYTES_PER_MP_LIMB);
205 MPN_COPY (anchor_up, orig_up, usize);
208 count_leading_zeros (d, up[usize-1]);
209 d = usize * BITS_PER_MP_LIMB - d;
210 count_leading_zeros (vbitsize, vp[vsize-1]);
211 vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
212 d = d - vbitsize + 1;
214 /* Use bmod reduction to quickly discover whether V divides U. */
215 up[usize++] = 0; /* Insert leading zero. */
216 mpn_bdivmod (up, up, usize, vp, vsize, d);
218 /* Now skip U/V mod 2^d and any low zero limbs. */
219 d /= BITS_PER_MP_LIMB, up += d, usize -= d;
220 while (usize != 0 && up[0] == 0)
223 if (usize == 0) /* GCD == ORIG_V. */
226 vp = (mp_ptr) TMP_ALLOC ((vsize + 2) * BYTES_PER_MP_LIMB);
227 MPN_COPY (vp, orig_vp, vsize);
231 if (up[usize-1] & SIGN_BIT) /* U < 0; take twos' compl. */
234 anchor_up[0] = -up[0];
235 for (i = 1; i < usize; i++)
236 anchor_up[i] = ~up[i];
240 MPN_NORMALIZE_NOT_ZERO (up, usize);
242 if ((up[0] & 1) == 0) /* Result even; remove twos. */
245 count_trailing_zeros (r, up[0]);
246 mpn_rshift (anchor_up, up, usize, r);
247 usize -= (anchor_up[usize-1] == 0);
249 else if (anchor_up != up)
250 MPN_COPY (anchor_up, up, usize);
252 SWAP_MPN (anchor_up, usize, vp, vsize);
255 if (vsize <= 2) /* Kary can't handle < 2 limbs and */
256 break; /* isn't efficient for == 2 limbs. */
259 count_leading_zeros (vbitsize, vp[vsize-1]);
260 vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
261 d = d - vbitsize + 1;
263 if (d > BMOD_THRESHOLD) /* Bmod reduction. */
266 mpn_bdivmod (up, up, usize, vp, vsize, d);
267 d /= BITS_PER_MP_LIMB, up += d, usize -= d;
269 else /* Kary reduction. */
271 mp_limb_t bp[2], cp[2];
273 /* C <-- V/U mod 2^(2*BITS_PER_MP_LIMB). */
274 cp[0] = vp[0], cp[1] = vp[1];
275 mpn_bdivmod (cp, cp, 2, up, 2, 2*BITS_PER_MP_LIMB);
277 /* U <-- find_a (C) * U. */
278 up[usize] = mpn_mul_1 (up, up, usize, find_a (cp));
281 /* B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1).
282 bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and
283 bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2 */
284 bp[0] = up[0], bp[1] = up[1];
285 mpn_bdivmod (bp, bp, 2, vp, 2, BITS_PER_MP_LIMB);
286 bp[1] &= 1; /* Since V is odd, division is unnecessary. */
289 if (bp[1]) /* B < 0: U <-- U + (-B) * V. */
291 mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0]);
292 mpn_add_1 (up + vsize, up + vsize, usize - vsize, c);
294 else /* B >= 0: U <-- U - B * V. */
296 mp_limb_t b = mpn_submul_1 (up, vp, vsize, bp[0]);
297 mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
300 up += 2, usize -= 2; /* At least two low limbs are zero. */
303 /* Must remove low zero limbs before complementing. */
304 while (usize != 0 && up[0] == 0)
309 /* Compute GCD (ORIG_V, GCD (ORIG_U, V)). Binary will execute twice. */
310 up = orig_up, usize = orig_usize;
316 /* Finish up with the binary algorithm. Executes once or twice. */
317 for ( ; binary_gcd_ctr--; up = orig_vp, usize = orig_vsize)
319 if (usize > 2) /* First make U close to V in size. */
321 unsigned long int vbitsize, d;
322 count_leading_zeros (d, up[usize-1]);
323 d = usize * BITS_PER_MP_LIMB - d;
324 count_leading_zeros (vbitsize, vp[vsize-1]);
325 vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
326 d = d - vbitsize - 1;
327 if (d != -(unsigned long int)1 && d > 2)
329 mpn_bdivmod (up, up, usize, vp, vsize, d); /* Result > 0. */
330 d /= (unsigned long int)BITS_PER_MP_LIMB, up += d, usize -= d;
334 /* Start binary GCD. */
339 /* Make sure U is odd. */
340 MPN_NORMALIZE (up, usize);
343 if ((up[0] & 1) == 0)
346 count_trailing_zeros (r, up[0]);
347 mpn_rshift (up, up, usize, r);
348 usize -= (up[usize-1] == 0);
351 /* Keep usize >= vsize. */
353 SWAP_MPN (up, usize, vp, vsize);
355 if (usize <= 2) /* Double precision. */
358 vp[0] = mpn_gcd_1 (up, usize, vp[0]);
360 vsize = gcd_2 (vp, up);
361 break; /* Binary GCD done. */
364 /* Count number of low zero limbs of U - V. */
365 for (zeros = 0; up[zeros] == vp[zeros] && ++zeros != vsize; )
368 /* If U < V, swap U and V; in any case, subtract V from U. */
369 if (zeros == vsize) /* Subtract done. */
370 up += zeros, usize -= zeros;
371 else if (usize == vsize)
373 mp_size_t size = vsize;
376 while (up[size] == vp[size]);
377 if (up[size] < vp[size]) /* usize == vsize. */
379 up += zeros, usize = size + 1 - zeros;
380 mpn_sub_n (up, up, vp + zeros, usize);
384 mp_size_t size = vsize - zeros;
385 up += zeros, usize -= zeros;
386 if (mpn_sub_n (up, up, vp + zeros, size))
388 while (up[size] == 0) /* Propagate borrow. */
389 up[size++] = -(mp_limb_t)1;
394 while (usize); /* End binary GCD. */
399 MPN_COPY (gp, vp, vsize);