contrib/gmp/mpn/generic/gcd.c

   1 /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
   2
   3 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,
   4 2004, 2005, 2008 Free Software Foundation, Inc.
   5
   6 This file is part of the GNU MP Library.
   7
   8 The GNU MP Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MP Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  20
  21 #include "gmp.h"
  22 #include "gmp-impl.h"
  23 #include "longlong.h"
  24
  25 /* Uses the HGCD operation described in
  26
  27      N. Möller, On Schönhage's algorithm and subquadratic integer gcd
  28      computation, Math. Comp. 77 (2008), 589-607.
  29
  30   to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
  31   then uses Lehmer's algorithm.
  32 */
  33
  34 /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
  35  * 2)/3, which gives a balanced multiplication in
  36  * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
  37  * performance. The matrix-vector multiplication is then
  38  * 4:1-unbalanced, with matrix elements of size n/6, and vector
  39  * elements of size p = 2n/3. */
  40
  41 /* From analysis of the theoretical running time, it appears that when
  42  * multiplication takes time O(n^alpha), p should be chosen so that
  43  * the ratio of the time for the mpn_hgcd call, and the time for the
  44  * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
  45  * 1). */
  46 #ifdef TUNE_GCD_P
  47 #define P_TABLE_SIZE 10000
  48 mp_size_t p_table[P_TABLE_SIZE];
  49 #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
  50 #else
  51 #define CHOOSE_P(n) (2*(n) / 3)
  52 #endif
  53
  54 mp_size_t
  55 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
  56 {
  57   mp_size_t talloc;
  58   mp_size_t scratch;
  59   mp_size_t matrix_scratch;
  60
  61   mp_size_t gn;
  62   mp_ptr tp;
  63   TMP_DECL;
  64
  65   /* FIXME: Check for small sizes first, before setting up temporary
  66      storage etc. */
  67   talloc = MPN_GCD_LEHMER_N_ITCH(n);
  68
  69   /* For initial division */
  70   scratch = usize - n + 1;
  71   if (scratch > talloc)
  72     talloc = scratch;
  73
  74 #if TUNE_GCD_P
  75   if (CHOOSE_P (n) > 0)
  76 #else
  77   if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
  78 #endif
  79     {
  80       mp_size_t hgcd_scratch;
  81       mp_size_t update_scratch;
  82       mp_size_t p = CHOOSE_P (n);
  83       mp_size_t scratch;
  84 #if TUNE_GCD_P
  85       /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
  86          is increasing */
  87       matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
  88       hgcd_scratch = mpn_hgcd_itch (n);
  89       update_scratch = 2*(n - 1);
  90 #else
  91       matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
  92       hgcd_scratch = mpn_hgcd_itch (n - p);
  93       update_scratch = p + n - 1;
  94 #endif
  95       scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
  96       if (scratch > talloc)
  97         talloc = scratch;
  98     }
  99
 100   TMP_MARK;
 101   tp = TMP_ALLOC_LIMBS(talloc);
 102
 103   if (usize > n)
 104     {
 105       mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
 106
 107       if (mpn_zero_p (up, n))
 108         {
 109           MPN_COPY (gp, vp, n);
 110           TMP_FREE;
 111           return n;
 112         }
 113     }
 114
 115 #if TUNE_GCD_P
 116   while (CHOOSE_P (n) > 0)
 117 #else
 118   while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
 119 #endif
 120     {
 121       struct hgcd_matrix M;
 122       mp_size_t p = CHOOSE_P (n);
 123       mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
 124       mp_size_t nn;
 125       mpn_hgcd_matrix_init (&M, n - p, tp);
 126       nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
 127       if (nn > 0)
 128         {
 129           ASSERT (M.n <= (n - p - 1)/2);
 130           ASSERT (M.n + p <= (p + n - 1) / 2);
 131           /* Temporary storage 2 (p + M->n) <= p + n - 1. */
 132           n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
 133         }
 134       else
 135         {
 136           /* Temporary storage n */
 137           n = mpn_gcd_subdiv_step (gp, &gn, up, vp, n, tp);
 138           if (n == 0)
 139             {
 140               TMP_FREE;
 141               return gn;
 142             }
 143         }
 144     }
 145
 146   gn = mpn_gcd_lehmer_n (gp, up, vp, n, tp);
 147   TMP_FREE;
 148   return gn;
 149 }
 150
 151 #ifdef TUNE_GCD_P
 152 #include <stdio.h>
 153 #include <string.h>
 154 #include <time.h>
 155 #include "speed.h"
 156
 157 static int
 158 compare_double(const void *ap, const void *bp)
 159 {
 160   double a = * (const double *) ap;
 161   double b = * (const double *) bp;
 162
 163   if (a < b)
 164     return -1;
 165   else if (a > b)
 166     return 1;
 167   else
 168     return 0;
 169 }
 170
 171 static double
 172 median (double *v, size_t n)
 173 {
 174   qsort(v, n, sizeof(*v), compare_double);
 175
 176   return v[n/2];
 177 }
 178
 179 #define TIME(res, code) do {                            \
 180   double time_measurement[5];                           \
 181   unsigned time_i;                                      \
 182                                                         \
 183   for (time_i = 0; time_i < 5; time_i++)                \
 184     {                                                   \
 185       speed_starttime();                                \
 186       code;                                             \
 187       time_measurement[time_i] = speed_endtime();       \
 188     }                                                   \
 189   res = median(time_measurement, 5);                    \
 190 } while (0)
 191
 192 int
 193 main(int argc, char *argv)
 194 {
 195   gmp_randstate_t rands;
 196   mp_size_t n;
 197   mp_ptr ap;
 198   mp_ptr bp;
 199   mp_ptr up;
 200   mp_ptr vp;
 201   mp_ptr gp;
 202   mp_ptr tp;
 203   TMP_DECL;
 204
 205   /* Unbuffered so if output is redirected to a file it isn't lost if the
 206      program is killed part way through.  */
 207   setbuf (stdout, NULL);
 208   setbuf (stderr, NULL);
 209
 210   gmp_randinit_default (rands);
 211
 212   TMP_MARK;
 213
 214   ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
 215   bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
 216   up = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
 217   vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
 218   gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
 219   tp = TMP_ALLOC_LIMBS (MPN_GCD_LEHMER_N_ITCH (P_TABLE_SIZE));
 220
 221   mpn_random (ap, P_TABLE_SIZE);
 222   mpn_random (bp, P_TABLE_SIZE);
 223
 224   memset (p_table, 0, sizeof(p_table));
 225
 226   for (n = 100; n++; n < P_TABLE_SIZE)
 227     {
 228       mp_size_t p;
 229       mp_size_t best_p;
 230       double best_time;
 231       double lehmer_time;
 232
 233       if (ap[n-1] == 0)
 234         ap[n-1] = 1;
 235
 236       if (bp[n-1] == 0)
 237         bp[n-1] = 1;
 238
 239       p_table[n] = 0;
 240       TIME(lehmer_time, {
 241           MPN_COPY (up, ap, n);
 242           MPN_COPY (vp, bp, n);
 243           mpn_gcd_lehmer_n (gp, up, vp, n, tp);
 244         });
 245
 246       best_time = lehmer_time;
 247       best_p = 0;
 248
 249       for (p = n * 0.48; p < n * 0.77; p++)
 250         {
 251           double t;
 252
 253           p_table[n] = p;
 254
 255           TIME(t, {
 256               MPN_COPY (up, ap, n);
 257               MPN_COPY (vp, bp, n);
 258               mpn_gcd (gp, up, n, vp, n);
 259             });
 260
 261           if (t < best_time)
 262             {
 263               best_time = t;
 264               best_p = p;
 265             }
 266         }
 267       printf("%6d %6d %5.3g", n, best_p, (double) best_p / n);
 268       if (best_p > 0)
 269         {
 270           double speedup = 100 * (lehmer_time - best_time) / lehmer_time;
 271           printf(" %5.3g%%", speedup);
 272           if (speedup < 1.0)
 273             {
 274               printf(" (ignored)");
 275               best_p = 0;
 276             }
 277         }
 278       printf("\n");
 279
 280       p_table[n] = best_p;
 281     }
 282   TMP_FREE;
 283   gmp_randclear(rands);
 284   return 0;
 285 }
 286 #endif /* TUNE_GCD_P */