1 /* Tune various threshold of MPFR
3 Copyright 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4 Contributed by the Arenaire and Cacao projects, INRIA.
6 This file is part of the GNU MPFR Library.
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or (at your
11 option) any later version.
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LIB. If not, write to
20 the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
21 MA 02110-1301, USA. */
26 #define MPFR_NEED_LONGLONG_H
27 #include "mpfr-impl.h"
30 #define _PROTO __GMP_PROTO
35 /* s->size: precision of both input and output
36 s->xp : Mantissa of first input
37 s->yp : mantissa of second input */
39 #define SPEED_MPFR_FUNC(mean_fun) do { \
45 MPFR_TMP_DECL (marker); \
47 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
48 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
49 MPFR_TMP_MARK (marker); \
51 size = (s->size-1)/BITS_PER_MP_LIMB+1; \
52 s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
53 MPFR_TMP_INIT1 (s->xp, x, s->size); \
54 MPFR_SET_EXP (x, 0); \
56 MPFR_TMP_INIT (wp, w, s->size, size); \
58 speed_operand_src (s, s->xp, size); \
59 speed_operand_dst (s, wp, size); \
60 speed_cache_fill (s); \
65 mean_fun (w, x, GMP_RNDN); \
67 t = speed_endtime (); \
69 MPFR_TMP_FREE (marker); \
73 #define SPEED_MPFR_OP(mean_fun) do { \
79 MPFR_TMP_DECL (marker); \
81 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
82 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
83 MPFR_TMP_MARK (marker); \
85 size = (s->size-1)/BITS_PER_MP_LIMB+1; \
86 s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
87 MPFR_TMP_INIT1 (s->xp, x, s->size); \
88 MPFR_SET_EXP (x, 0); \
89 s->yp[size-1] |= MPFR_LIMB_HIGHBIT; \
90 MPFR_TMP_INIT1 (s->yp, y, s->size); \
91 MPFR_SET_EXP (y, 0); \
93 MPFR_TMP_INIT (wp, w, s->size, size); \
95 speed_operand_src (s, s->xp, size); \
96 speed_operand_src (s, s->yp, size); \
97 speed_operand_dst (s, wp, size); \
98 speed_cache_fill (s); \
100 speed_starttime (); \
103 mean_fun (w, x, y, GMP_RNDN); \
105 t = speed_endtime (); \
107 MPFR_TMP_FREE (marker); \
112 /* First we include all the functions we want to tune inside this program.
113 We can't use GNU MPFR library since the THRESHOLD can't vary */
115 /* Setup mpfr_exp_2 */
116 mp_prec_t mpfr_exp_2_threshold;
117 #undef MPFR_EXP_2_THRESHOLD
118 #define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
120 static double speed_mpfr_exp_2 (struct speed_params *s) {
121 SPEED_MPFR_FUNC (mpfr_exp_2);
125 mp_prec_t mpfr_exp_threshold;
126 #undef MPFR_EXP_THRESHOLD
127 #define MPFR_EXP_THRESHOLD mpfr_exp_threshold
129 static double speed_mpfr_exp (struct speed_params *s) {
130 SPEED_MPFR_FUNC (mpfr_exp);
134 mp_prec_t mpfr_mul_threshold;
135 #undef MPFR_MUL_THRESHOLD
136 #define MPFR_MUL_THRESHOLD mpfr_mul_threshold
138 static double speed_mpfr_mul (struct speed_params *s) {
139 SPEED_MPFR_OP (mpfr_mul);
144 /************************************************
145 * Common functions (inspired by GMP function) *
146 ************************************************/
148 analyze_data (double *dat, int ndat) {
153 for (j = 0; j < ndat; j++)
160 for (j = 0; j < ndat; x -= dat[j], j++)
171 #define THRESHOLD_WINDOW 16
172 #define THRESHOLD_FINAL_WINDOW 128
173 static double domeasure (mp_prec_t *threshold,
174 double (*func) (struct speed_params *),
177 struct speed_params s;
181 s.align_xp = s.align_yp = s.align_wp = 64;
183 size = (p - 1)/BITS_PER_MP_LIMB+1;
184 s.xp = malloc (2*size*sizeof (mp_limb_t));
187 fprintf (stderr, "Can't allocate memory.\n");
190 mpn_random (s.xp, size);
192 mpn_random (s.yp, size);
193 *threshold = MPFR_PREC_MAX;
194 t1 = speed_measure (func, &s);
197 fprintf (stderr, "Failed to measure function 1!\n");
201 t2 = speed_measure (func, &s);
204 fprintf (stderr, "Failed to measure function 2!\n");
208 /* t1 is the time of the first algo (used for low prec) */
213 /* d > 0 if we have to use algo 1.
214 d < 0 if we have to use algo 2 */
218 /* Tune a function with a simple THRESHOLD
219 The function doesn't depend on another threshold.
220 It assumes that it uses algo1 if p < THRESHOLD
222 if algo2 is better for low prec, and algo1 better for high prec,
223 the behaviour of this function is undefined. */
225 tune_simple_func (mp_prec_t *threshold,
226 double (*func) (struct speed_params *),
229 double measure[THRESHOLD_FINAL_WINDOW+1];
232 int i, numpos, numneg, try;
233 mp_prec_t pmin, pmax, p;
235 /* first look for a lower bound within 10% */
237 d = domeasure (threshold, func, pmin);
240 printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
241 (unsigned long) pmin);
242 *threshold = MPFR_PREC_MIN;
247 d = domeasure (threshold, func, pmin);
255 d = domeasure (threshold, func, pmin);
258 pmin += BITS_PER_MP_LIMB;
261 /* then look for a upper bound within 20% */
264 d = domeasure (threshold, func, pmax);
267 pmax += pmin / 2; /* don't increase too rapidly */
270 /* The threshold is between pmin and pmax. Affine them */
272 while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
274 pstep = MAX(MIN(BITS_PER_MP_LIMB/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
276 printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
277 p = (pmin + pmax) / 2;
278 for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++) {
279 measure[i] = domeasure (threshold, func,
280 p+(i-THRESHOLD_WINDOW/2)*pstep);
283 else if (measure[i] < 0)
287 /* We use more often algo 1 than algo 2 */
288 pmin = p - THRESHOLD_WINDOW/2*pstep;
289 else if (numpos < numneg)
290 pmax = p + THRESHOLD_WINDOW/2*pstep;
292 /* numpos == numneg ... */
296 printf ("Quick find: %lu\n", *threshold);
303 printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
304 for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
305 measure[i] = domeasure (threshold, func, pmin+i);
306 i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
307 *threshold = pmin + i;
309 printf ("%lu\n", *threshold);
315 /************************************
316 * Tune Mulders' mulhigh function *
317 ************************************/
318 #define TOLERANCE 1.02
319 #ifndef MPFR_MULHIGH_SIZE
320 # define MPFR_MULHIGH_SIZE 1024
322 #ifndef MPFR_SQRHIGH_SIZE
323 # define MPFR_SQRHIGH_SIZE 1024
325 #define MPFR_MULHIGH_TAB_SIZE MPFR_MULHIGH_SIZE
326 #define MPFR_SQRHIGH_TAB_SIZE MPFR_SQRHIGH_SIZE
329 static double speed_mpfr_mulhigh (struct speed_params *s) {
330 SPEED_ROUTINE_MPN_MUL_N (mpfr_mulhigh_n);
332 static double speed_mpfr_sqrhigh (struct speed_params *s) {
333 SPEED_ROUTINE_MPN_SQR (mpfr_sqrhigh_n);
336 #define MAX_STEPS 32 /* maximum number of values of k tried for a given n */
340 tune_mulders_upto (mp_size_t n)
342 struct speed_params s;
343 mp_size_t k, kbest, step;
345 MPFR_TMP_DECL (marker);
350 MPFR_TMP_MARK (marker);
351 s.align_xp = s.align_yp = s.align_wp = 64;
353 s.xp = MPFR_TMP_ALLOC (n*sizeof (mp_limb_t));
354 s.yp = MPFR_TMP_ALLOC (n*sizeof (mp_limb_t));
355 mpn_random (s.xp, n);
356 mpn_random (s.yp, n);
358 /* Check k == -1, mpn_mul_basecase */
359 mulhigh_ktab[n] = -1;
361 tbest = speed_measure (speed_mpfr_mulhigh, &s);
363 /* Check k == 0, mpn_mulhigh_n_basecase */
365 t = speed_measure (speed_mpfr_mulhigh, &s);
366 if (t * TOLERANCE < tbest)
367 kbest = 0, tbest = t;
369 /* Check Mulders with cutoff point k */
370 step = 1 + n / (2 * MAX_STEPS);
371 for (k = n / 2 + 1 ; k < n ; k += step)
374 t = speed_measure (speed_mpfr_mulhigh, &s);
375 if (t * TOLERANCE < tbest)
376 kbest = k, tbest = t;
379 mulhigh_ktab[n] = kbest;
381 MPFR_TMP_FREE (marker);
387 tune_sqr_mulders_upto (mp_size_t n)
389 struct speed_params s;
390 mp_size_t k, kbest, step;
392 MPFR_TMP_DECL (marker);
397 MPFR_TMP_MARK (marker);
398 s.align_xp = s.align_wp = 64;
400 s.xp = MPFR_TMP_ALLOC (n*sizeof (mp_limb_t));
401 mpn_random (s.xp, n);
403 /* Check k == -1, mpn_sqr_basecase */
404 sqrhigh_ktab[n] = -1;
406 tbest = speed_measure (speed_mpfr_sqrhigh, &s);
408 /* Check k == 0, mpfr_mulhigh_n_basecase */
410 t = speed_measure (speed_mpfr_sqrhigh, &s);
411 if (t * TOLERANCE < tbest)
412 kbest = 0, tbest = t;
415 step = 1 + n / (2 * MAX_STEPS);
416 for (k = n / 2 + 1 ; k < n ; k += step)
419 t = speed_measure (speed_mpfr_sqrhigh, &s);
420 if (t * TOLERANCE < tbest)
421 kbest = k, tbest = t;
424 sqrhigh_ktab[n] = kbest;
426 MPFR_TMP_FREE (marker);
431 tune_mulders (FILE *f)
436 printf ("Tuning mpfr_mulhigh_n[%d]", (int) MPFR_MULHIGH_TAB_SIZE);
437 fprintf (f, "#define MPFR_MULHIGH_TAB \\\n ");
438 for (k = 0 ; k < MPFR_MULHIGH_TAB_SIZE ; k++)
440 fprintf (f, "%d", (int) tune_mulders_upto (k));
441 if (k != MPFR_MULHIGH_TAB_SIZE-1)
444 fprintf (f, " \\\n ");
454 tune_sqr_mulders (FILE *f)
459 printf ("Tuning mpfr_sqrhigh_n[%d]", (int) MPFR_SQRHIGH_TAB_SIZE);
460 fprintf (f, "#define MPFR_SQRHIGH_TAB \\\n ");
461 for (k = 0 ; k < MPFR_SQRHIGH_TAB_SIZE ; k++)
463 fprintf (f, "%d", (int) tune_sqr_mulders_upto (k));
464 if (k != MPFR_SQRHIGH_TAB_SIZE-1)
467 fprintf (f, " \\\n ");
476 /*******************************************************
477 * Tune all the threshold of MPFR *
478 * Warning: tune the function in their dependent order!*
479 *******************************************************/
481 all (const char *filename)
484 time_t start_time, end_time;
487 f = fopen (filename, "w");
489 fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
495 printf ("Using: %s\n", speed_time_string);
496 printf ("speed_precision %d", speed_precision);
497 if (speed_unittime == 1.0)
498 printf (", speed_unittime 1 cycle");
500 printf (", speed_unittime %.2e secs", speed_unittime);
501 if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
502 printf (", CPU freq unknown\n");
504 printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime);
508 tp = localtime (&start_time);
509 fprintf (f, "/* Generated by MPFR's tuneup.c, %d-%02d-%02d, ",
510 tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
513 fprintf (f, "icc %d.%d.%d */\n", __ICC / 100, __ICC / 10 % 10, __ICC % 10);
514 #elif defined(__GNUC__)
515 #ifdef __GNUC_PATCHLEVEL__
516 fprintf (f, "gcc %d.%d.%d */\n", __GNUC__, __GNUC_MINOR__,
517 __GNUC_PATCHLEVEL__);
519 fprintf (f, "gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__);
521 #elif defined (__SUNPRO_C)
522 fprintf (f, "Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);
523 #elif defined (__sgi) && defined (_COMPILER_VERSION)
524 fprintf (f, "MIPSpro C %d.%d.%d */\n",
525 _COMPILER_VERSION / 100,
526 _COMPILER_VERSION / 10 % 10,
527 _COMPILER_VERSION % 10);
528 #elif defined (__DECC) && defined (__DECC_VER)
529 fprintf (f, "DEC C %d */\n", __DECC_VER);
531 fprintf (f, "system compiler */\n");
539 tune_sqr_mulders (f);
541 /* Tune mpfr_mul (threshold is in limbs, but it doesn't matter too much) */
543 printf ("Tuning mpfr_mul...\n");
544 tune_simple_func (&mpfr_mul_threshold, speed_mpfr_mul,
545 2*BITS_PER_MP_LIMB+1);
546 fprintf (f, "#define MPFR_MUL_THRESHOLD %lu /* limbs */\n",
547 (unsigned long) (mpfr_mul_threshold - 1) / BITS_PER_MP_LIMB + 1);
549 /* Tune mpfr_exp_2 */
551 printf ("Tuning mpfr_exp_2...\n");
552 tune_simple_func (&mpfr_exp_2_threshold, speed_mpfr_exp_2,
554 mpfr_exp_2_threshold = MAX (BITS_PER_MP_LIMB, mpfr_exp_2_threshold);
555 fprintf (f, "#define MPFR_EXP_2_THRESHOLD %lu /* bits */\n",
556 (unsigned long) mpfr_exp_2_threshold);
560 printf ("Tuning mpfr_exp...\n");
561 tune_simple_func (&mpfr_exp_threshold, speed_mpfr_exp,
562 MPFR_PREC_MIN+3*BITS_PER_MP_LIMB);
563 fprintf (f, "#define MPFR_EXP_THRESHOLD %lu /* bits */\n",
564 (unsigned long) mpfr_exp_threshold);
568 fprintf (f, "/* Tuneup completed successfully, took %ld seconds */\n",
569 end_time - start_time);
571 printf ("Complete (took %ld seconds).\n", end_time - start_time);
578 int main (int argc, char *argv[])
580 /* Unbuffered so if output is redirected to a file it isn't lost if the
581 program is killed part way through. */
582 setbuf (stdout, NULL);
583 setbuf (stderr, NULL);
588 printf ("Tuning MPFR (Coffee time?)...\n");