1 /* mpn_gcdext -- Extended Greatest Common Divisor.
3 Copyright (C) 1996 Free Software Foundation, Inc.
5 This file is part of the GNU MP Library.
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Library General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or (at your
10 option) any later version.
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15 License for more details.
17 You should have received a copy of the GNU Library General Public License
18 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20 MA 02111-1307, USA. */
31 int arr[BITS_PER_MP_LIMB];
34 #define SGN(A) (((A) < 0) ? -1 : ((A) > 0))
36 /* Idea 1: After we have performed a full division, don't shift operands back,
37 but instead account for the extra factors-of-2 thus introduced.
38 Idea 2: Simple generalization to use divide-and-conquer would give us an
39 algorithm that runs faster than O(n^2).
40 Idea 3: The input numbers need less space as the computation progresses,
41 while the s0 and s1 variables need more space. To save space, we
42 could make them share space, and have the latter variables grow
45 /* Precondition: U >= V. */
50 mpn_gcdext (mp_ptr gp, mp_ptr s0p,
51 mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
53 mpn_gcdext (gp, s0p, up, size, vp, vsize)
64 mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
66 mpn_gcd (gp, up, size, vp, vsize)
76 mp_limb_signed_t A, B, C, D;
80 mp_limb_signed_t min = 0, max = 0;
84 mp_ptr orig_s0p = s0p;
85 mp_size_t ssize, orig_size = size;
90 tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
91 wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
92 s1p = (mp_ptr) TMP_ALLOC (size * BYTES_PER_MP_LIMB);
104 /* Normalize V (and shift up U the same amount). */
105 count_leading_zeros (cnt, vp[vsize - 1]);
109 mpn_lshift (vp, vp, vsize, cnt);
110 cy = mpn_lshift (up, up, size, cnt);
115 mpn_divmod (up + vsize, up, size, vp, vsize);
117 /* This is really what it boils down to in this case... */
124 mpn_rshift (up, up, size, cnt);
125 mpn_rshift (vp, vp, size, cnt);
129 xp = up; up = vp; vp = xp;
135 /* Figure out exact size of V. */
137 MPN_NORMALIZE (vp, vsize);
141 /* Make UH be the most significant limb of U, and make VH be
142 corresponding bits from V. */
145 count_leading_zeros (cnt, uh);
148 uh = (uh << cnt) | (up[size - 2] >> (BITS_PER_MP_LIMB - cnt));
149 vh = (vh << cnt) | (vp[size - 2] >> (BITS_PER_MP_LIMB - cnt));
153 /* For now, only handle BITS_PER_MP_LIMB-1 bits. This makes
154 room for sign bit. */
165 mp_limb_signed_t q, T;
166 if (vh + C == 0 || vh + D == 0)
169 q = (uh + A) / (vh + C);
170 if (q != (uh + B) / (vh + D))
185 min = MIN (A, min); min = MIN (B, min);
186 min = MIN (C, min); min = MIN (D, min);
187 max = MAX (A, max); max = MAX (B, max);
188 max = MAX (C, max); max = MAX (D, max);
196 /* This is quite rare. I.e., optimize something else! */
198 /* Normalize V (and shift up U the same amount). */
199 count_leading_zeros (cnt, vp[vsize - 1]);
203 mpn_lshift (vp, vp, vsize, cnt);
204 cy = mpn_lshift (up, up, size, cnt);
209 qh = mpn_divmod (up + vsize, up, size, vp, vsize);
211 MPN_COPY (tp, s0p, ssize);
212 for (i = 0; i < size - vsize; i++)
215 cy = mpn_addmul_1 (tp + i, s1p, ssize, up[vsize + i]);
223 /* XXX since qh == 1, mpn_addmul_1 is overkill */
224 cy = mpn_addmul_1 (tp + size - vsize, s1p, ssize, qh);
229 MPN_COPY (s0p, s1p, ssize); /* should be old ssize, kind of */
230 MPN_COPY (s1p, tp, ssize);
234 xp = s0p; s0p = s1p; s1p = xp;
235 xp = s1p; s1p = tp; tp = xp;
242 mpn_rshift (up, up, size, cnt);
243 mpn_rshift (vp, vp, size, cnt);
248 xp = up; up = vp; vp = xp;
250 MPN_NORMALIZE (up, size);
259 if (SGN(A) == SGN(B)) /* should be different sign */
261 if (SGN(C) == SGN(D)) /* should be different sign */
265 x = ABS (A) | ABS (B) | ABS (C) | ABS (D);
266 count_leading_zeros (cnt, x);
267 arr[BITS_PER_MP_LIMB - cnt]++; }
271 if (B != 1) abort ();
272 MPN_COPY (tp, vp, size);
278 mpn_mul_1 (tp, vp, size, B);
279 mpn_submul_1 (tp, up, size, -A);
283 mpn_mul_1 (tp, up, size, A);
284 mpn_submul_1 (tp, vp, size, -B);
289 mpn_mul_1 (wp, vp, size, D);
290 mpn_submul_1 (wp, up, size, -C);
294 mpn_mul_1 (wp, up, size, C);
295 mpn_submul_1 (wp, vp, size, -D);
300 xp = tp; tp = up; up = xp;
301 xp = wp; wp = vp; vp = xp;
306 MPN_ZERO (tp, orig_size);
309 if (B != 1) abort ();
310 MPN_COPY (tp, s1p, ssize);
316 cy = mpn_mul_1 (tp, s1p, ssize, B);
317 cy += mpn_addmul_1 (tp, s0p, ssize, -A);
321 cy = mpn_mul_1 (tp, s0p, ssize, A);
322 cy += mpn_addmul_1 (tp, s1p, ssize, -B);
327 MPN_ZERO (wp, orig_size);
330 cy = mpn_mul_1 (wp, s1p, ssize, D);
331 cy += mpn_addmul_1 (wp, s0p, ssize, -C);
335 cy = mpn_mul_1 (wp, s0p, ssize, C);
336 cy += mpn_addmul_1 (wp, s1p, ssize, -D);
343 xp = tp; tp = s0p; s0p = xp;
344 xp = wp; wp = s1p; s1p = xp;
347 #if 0 /* Is it a win to remove multiple zeros here? */
348 MPN_NORMALIZE (up, size);
350 if (up[size - 1] == 0)
357 printf ("min: %ld\n", min);
358 printf ("max: %ld\n", max);
364 MPN_COPY (gp, up, size);
367 MPN_COPY (orig_s0p, s0p, ssize);
381 t = mpn_divmod_1 (wp, up, size, vl);
382 MPN_COPY (tp, s0p, ssize);
383 for (i = 0; i < size; i++)
385 cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]);
390 MPN_COPY (s0p, s1p, ssize);
391 MPN_COPY (s1p, tp, ssize);
395 xp = s0p; s0p = s1p; s1p = xp;
396 xp = s1p; s1p = tp; tp = xp;
400 t = mpn_mod_1 (up, size, vl);
412 MPN_COPY (tp, s0p, ssize);
413 cy = mpn_addmul_1 (tp, s1p, ssize, q);
417 MPN_COPY (s0p, s1p, ssize);
418 MPN_COPY (s1p, tp, ssize);
422 xp = s0p; s0p = s1p; s1p = xp;
423 xp = s1p; s1p = tp; tp = xp;
436 MPN_COPY (orig_s0p, s0p, ssize);