1 /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
3 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,
4 2004, 2005, 2008 Free Software Foundation, Inc.
6 This file is part of the GNU MP Library.
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
13 The GNU MP Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25 /* Uses the HGCD operation described in
27 N. Möller, On Schönhage's algorithm and subquadratic integer gcd
28 computation, Math. Comp. 77 (2008), 589-607.
30 to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
31 then uses Lehmer's algorithm.
34 /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
35 * 2)/3, which gives a balanced multiplication in
36 * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
37 * performance. The matrix-vector multiplication is then
38 * 4:1-unbalanced, with matrix elements of size n/6, and vector
39 * elements of size p = 2n/3. */
41 /* From analysis of the theoretical running time, it appears that when
42 * multiplication takes time O(n^alpha), p should be chosen so that
43 * the ratio of the time for the mpn_hgcd call, and the time for the
44 * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
47 #define P_TABLE_SIZE 10000
48 mp_size_t p_table[P_TABLE_SIZE];
49 #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
51 #define CHOOSE_P(n) (2*(n) / 3)
55 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
59 mp_size_t matrix_scratch;
65 /* FIXME: Check for small sizes first, before setting up temporary
67 talloc = MPN_GCD_LEHMER_N_ITCH(n);
69 /* For initial division */
70 scratch = usize - n + 1;
77 if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
80 mp_size_t hgcd_scratch;
81 mp_size_t update_scratch;
82 mp_size_t p = CHOOSE_P (n);
85 /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
87 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
88 hgcd_scratch = mpn_hgcd_itch (n);
89 update_scratch = 2*(n - 1);
91 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
92 hgcd_scratch = mpn_hgcd_itch (n - p);
93 update_scratch = p + n - 1;
95 scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
101 tp = TMP_ALLOC_LIMBS(talloc);
105 mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
107 if (mpn_zero_p (up, n))
109 MPN_COPY (gp, vp, n);
116 while (CHOOSE_P (n) > 0)
118 while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
121 struct hgcd_matrix M;
122 mp_size_t p = CHOOSE_P (n);
123 mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
125 mpn_hgcd_matrix_init (&M, n - p, tp);
126 nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
129 ASSERT (M.n <= (n - p - 1)/2);
130 ASSERT (M.n + p <= (p + n - 1) / 2);
131 /* Temporary storage 2 (p + M->n) <= p + n - 1. */
132 n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
136 /* Temporary storage n */
137 n = mpn_gcd_subdiv_step (gp, &gn, up, vp, n, tp);
146 gn = mpn_gcd_lehmer_n (gp, up, vp, n, tp);
158 compare_double(const void *ap, const void *bp)
160 double a = * (const double *) ap;
161 double b = * (const double *) bp;
172 median (double *v, size_t n)
174 qsort(v, n, sizeof(*v), compare_double);
179 #define TIME(res, code) do { \
180 double time_measurement[5]; \
183 for (time_i = 0; time_i < 5; time_i++) \
187 time_measurement[time_i] = speed_endtime(); \
189 res = median(time_measurement, 5); \
193 main(int argc, char *argv)
195 gmp_randstate_t rands;
205 /* Unbuffered so if output is redirected to a file it isn't lost if the
206 program is killed part way through. */
207 setbuf (stdout, NULL);
208 setbuf (stderr, NULL);
210 gmp_randinit_default (rands);
214 ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
215 bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
216 up = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
217 vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
218 gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
219 tp = TMP_ALLOC_LIMBS (MPN_GCD_LEHMER_N_ITCH (P_TABLE_SIZE));
221 mpn_random (ap, P_TABLE_SIZE);
222 mpn_random (bp, P_TABLE_SIZE);
224 memset (p_table, 0, sizeof(p_table));
226 for (n = 100; n++; n < P_TABLE_SIZE)
241 MPN_COPY (up, ap, n);
242 MPN_COPY (vp, bp, n);
243 mpn_gcd_lehmer_n (gp, up, vp, n, tp);
246 best_time = lehmer_time;
249 for (p = n * 0.48; p < n * 0.77; p++)
256 MPN_COPY (up, ap, n);
257 MPN_COPY (vp, bp, n);
258 mpn_gcd (gp, up, n, vp, n);
267 printf("%6d %6d %5.3g", n, best_p, (double) best_p / n);
270 double speedup = 100 * (lehmer_time - best_time) / lehmer_time;
271 printf(" %5.3g%%", speedup);
274 printf(" (ignored)");
283 gmp_randclear(rands);
286 #endif /* TUNE_GCD_P */