contrib/mpfr/src/exp_2.c

   1 /* mpfr_exp_2 -- exponential of a floating-point number
   2                  using algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n))
   3
   4 Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
   5 Contributed by the AriC and Caramel projects, INRIA.
   6
   7 This file is part of the GNU MPFR Library.
   8
   9 The GNU MPFR Library is free software; you can redistribute it and/or modify
  10 it under the terms of the GNU Lesser General Public License as published by
  11 the Free Software Foundation; either version 3 of the License, or (at your
  12 option) any later version.
  13
  14 The GNU MPFR Library is distributed in the hope that it will be useful, but
  15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  17 License for more details.
  18
  19 You should have received a copy of the GNU Lesser General Public License
  20 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
  21 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
  22 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
  23
  24 /* #define DEBUG */
  25 #define MPFR_NEED_LONGLONG_H /* for count_leading_zeros */
  26 #include "mpfr-impl.h"
  27
  28 static unsigned long
  29 mpfr_exp2_aux (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *);
  30 static unsigned long
  31 mpfr_exp2_aux2 (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *);
  32 static mpfr_exp_t
  33 mpz_normalize  (mpz_t, mpz_t, mpfr_exp_t);
  34 static mpfr_exp_t
  35 mpz_normalize2 (mpz_t, mpz_t, mpfr_exp_t, mpfr_exp_t);
  36
  37 /* if k = the number of bits of z > q, divides z by 2^(k-q) and returns k-q.
  38    Otherwise do nothing and return 0.
  39  */
  40 static mpfr_exp_t
  41 mpz_normalize (mpz_t rop, mpz_t z, mpfr_exp_t q)
  42 {
  43   size_t k;
  44
  45   MPFR_MPZ_SIZEINBASE2 (k, z);
  46   MPFR_ASSERTD (k == (mpfr_uexp_t) k);
  47   if (q < 0 || (mpfr_uexp_t) k > (mpfr_uexp_t) q)
  48     {
  49       mpz_fdiv_q_2exp (rop, z, (unsigned long) ((mpfr_uexp_t) k - q));
  50       return (mpfr_exp_t) k - q;
  51     }
  52   if (MPFR_UNLIKELY(rop != z))
  53     mpz_set (rop, z);
  54   return 0;
  55 }
  56
  57 /* if expz > target, shift z by (expz-target) bits to the left.
  58    if expz < target, shift z by (target-expz) bits to the right.
  59    Returns target.
  60 */
  61 static mpfr_exp_t
  62 mpz_normalize2 (mpz_t rop, mpz_t z, mpfr_exp_t expz, mpfr_exp_t target)
  63 {
  64   if (target > expz)
  65     mpz_fdiv_q_2exp (rop, z, target - expz);
  66   else
  67     mpz_mul_2exp (rop, z, expz - target);
  68   return target;
  69 }
  70
  71 /* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n
  72    where x = n*log(2)+(2^K)*r
  73    together with the Paterson-Stockmeyer O(t^(1/2)) algorithm for the
  74    evaluation of power series. The resulting complexity is O(n^(1/3)*M(n)).
  75    This function returns with the exact flags due to exp.
  76 */
  77 int
  78 mpfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
  79 {
  80   long n;
  81   unsigned long K, k, l, err; /* FIXME: Which type ? */
  82   int error_r;
  83   mpfr_exp_t exps, expx;
  84   mpfr_prec_t q, precy;
  85   int inexact;
  86   mpfr_t r, s;
  87   mpz_t ss;
  88   MPFR_ZIV_DECL (loop);
  89
  90   MPFR_LOG_FUNC
  91     (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec(x), mpfr_log_prec, x, rnd_mode),
  92      ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec(y), mpfr_log_prec, y,
  93       inexact));
  94
  95   expx = MPFR_GET_EXP (x);
  96   precy = MPFR_PREC(y);
  97
  98   /* Warning: we cannot use the 'double' type here, since on 64-bit machines
  99      x may be as large as 2^62*log(2) without overflow, and then x/log(2)
 100      is about 2^62: not every integer of that size can be represented as a
 101      'double', thus the argument reduction would fail. */
 102   if (expx <= -2)
 103     /* |x| <= 0.25, thus n = round(x/log(2)) = 0 */
 104     n = 0;
 105   else
 106     {
 107       mpfr_init2 (r, sizeof (long) * CHAR_BIT);
 108       mpfr_const_log2 (r, MPFR_RNDZ);
 109       mpfr_div (r, x, r, MPFR_RNDN);
 110       n = mpfr_get_si (r, MPFR_RNDN);
 111       mpfr_clear (r);
 112     }
 113   /* we have |x| <= (|n|+1)*log(2) */
 114   MPFR_LOG_MSG (("d(x)=%1.30e n=%ld\n", mpfr_get_d1(x), n));
 115
 116   /* error_r bounds the cancelled bits in x - n*log(2) */
 117   if (MPFR_UNLIKELY (n == 0))
 118     error_r = 0;
 119   else
 120     {
 121       count_leading_zeros (error_r, (mp_limb_t) SAFE_ABS (unsigned long, n) + 1);
 122       error_r = GMP_NUMB_BITS - error_r;
 123       /* we have |x| <= 2^error_r * log(2) */
 124     }
 125
 126   /* for the O(n^(1/2)*M(n)) method, the Taylor series computation of
 127      n/K terms costs about n/(2K) multiplications when computed in fixed
 128      point */
 129   K = (precy < MPFR_EXP_2_THRESHOLD) ? __gmpfr_isqrt ((precy + 1) / 2)
 130     : __gmpfr_cuberoot (4*precy);
 131   l = (precy - 1) / K + 1;
 132   err = K + MPFR_INT_CEIL_LOG2 (2 * l + 18);
 133   /* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */
 134   q = precy + err + K + 8;
 135   /* if |x| >> 1, take into account the cancelled bits */
 136   if (expx > 0)
 137     q += expx;
 138
 139   /* Note: due to the mpfr_prec_round below, it is not possible to use
 140      the MPFR_GROUP_* macros here. */
 141
 142   mpfr_init2 (r, q + error_r);
 143   mpfr_init2 (s, q + error_r);
 144
 145   /* the algorithm consists in computing an upper bound of exp(x) using
 146      a precision of q bits, and see if we can round to MPFR_PREC(y) taking
 147      into account the maximal error. Otherwise we increase q. */
 148   MPFR_ZIV_INIT (loop, q);
 149   for (;;)
 150     {
 151       MPFR_LOG_MSG (("n=%ld K=%lu l=%lu q=%lu error_r=%d\n",
 152                      n, K, l, (unsigned long) q, error_r));
 153
 154       /* First reduce the argument to r = x - n * log(2),
 155          so that r is small in absolute value. We want an upper
 156          bound on r to get an upper bound on exp(x). */
 157
 158       /* if n<0, we have to get an upper bound of log(2)
 159          in order to get an upper bound of r = x-n*log(2) */
 160       mpfr_const_log2 (s, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU);
 161       /* s is within 1 ulp(s) of log(2) */
 162
 163       mpfr_mul_ui (r, s, (n < 0) ? -n : n, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU);
 164       /* r is within 3 ulps of |n|*log(2) */
 165       if (n < 0)
 166         MPFR_CHANGE_SIGN (r);
 167       /* r <= n*log(2), within 3 ulps */
 168
 169       MPFR_LOG_VAR (x);
 170       MPFR_LOG_VAR (r);
 171
 172       mpfr_sub (r, x, r, MPFR_RNDU);
 173
 174       if (MPFR_IS_PURE_FP (r))
 175         {
 176           while (MPFR_IS_NEG (r))
 177             { /* initial approximation n was too large */
 178               n--;
 179               mpfr_add (r, r, s, MPFR_RNDU);
 180             }
 181
 182           /* since there was a cancellation in x - n*log(2), the low error_r
 183              bits from r are zero and thus non significant, thus we can reduce
 184              the working precision */
 185           if (error_r > 0)
 186             mpfr_prec_round (r, q, MPFR_RNDU);
 187           /* the error on r is at most 3 ulps (3 ulps if error_r = 0,
 188              and 1 + 3/2 if error_r > 0) */
 189           MPFR_LOG_VAR (r);
 190           MPFR_ASSERTD (MPFR_IS_POS (r));
 191           mpfr_div_2ui (r, r, K, MPFR_RNDU); /* r = (x-n*log(2))/2^K, exact */
 192
 193           mpz_init (ss);
 194           exps = mpfr_get_z_2exp (ss, s);
 195           /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */
 196           MPFR_ASSERTD (MPFR_IS_PURE_FP (r) && MPFR_EXP (r) < 0);
 197           l = (precy < MPFR_EXP_2_THRESHOLD)
 198             ? mpfr_exp2_aux (ss, r, q, &exps)   /* naive method */
 199             : mpfr_exp2_aux2 (ss, r, q, &exps); /* Paterson/Stockmeyer meth */
 200
 201           MPFR_LOG_MSG (("l=%lu q=%lu (K+l)*q^2=%1.3e\n",
 202                          l, (unsigned long) q, (K + l) * (double) q * q));
 203
 204           for (k = 0; k < K; k++)
 205             {
 206               mpz_mul (ss, ss, ss);
 207               exps <<= 1;
 208               exps += mpz_normalize (ss, ss, q);
 209             }
 210           mpfr_set_z (s, ss, MPFR_RNDN);
 211
 212           MPFR_SET_EXP(s, MPFR_GET_EXP (s) + exps);
 213           mpz_clear (ss);
 214
 215           /* error is at most 2^K*l, plus 2 to take into account of
 216              the error of 3 ulps on r */
 217           err = K + MPFR_INT_CEIL_LOG2 (l) + 2;
 218
 219           MPFR_LOG_MSG (("before mult. by 2^n:\n", 0));
 220           MPFR_LOG_VAR (s);
 221           MPFR_LOG_MSG (("err=%lu bits\n", K));
 222
 223           if (MPFR_LIKELY (MPFR_CAN_ROUND (s, q - err, precy, rnd_mode)))
 224             {
 225               mpfr_clear_flags ();
 226               inexact = mpfr_mul_2si (y, s, n, rnd_mode);
 227               break;
 228             }
 229         }
 230
 231       MPFR_ZIV_NEXT (loop, q);
 232       mpfr_set_prec (r, q + error_r);
 233       mpfr_set_prec (s, q + error_r);
 234     }
 235   MPFR_ZIV_FREE (loop);
 236
 237   mpfr_clear (r);
 238   mpfr_clear (s);
 239
 240   return inexact;
 241 }
 242
 243 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
 244    using naive method with O(l) multiplications.
 245    Return the number of iterations l.
 246    The absolute error on s is less than 3*l*(l+1)*2^(-q).
 247    Version using fixed-point arithmetic with mpz instead
 248    of mpfr for internal computations.
 249    NOTE[VL]: the following sentence seems to be obsolete since MY_INIT_MPZ
 250    is no longer used (r6919); qn was the number of limbs of q.
 251    s must have at least qn+1 limbs (qn should be enough, but currently fails
 252    since mpz_mul_2exp(s, s, q-1) reallocates qn+1 limbs)
 253 */
 254 static unsigned long
 255 mpfr_exp2_aux (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps)
 256 {
 257   unsigned long l;
 258   mpfr_exp_t dif, expt, expr;
 259   mpz_t t, rr;
 260   mp_size_t sbit, tbit;
 261
 262   MPFR_ASSERTN (MPFR_IS_PURE_FP (r));
 263
 264   expt = 0;
 265   *exps = 1 - (mpfr_exp_t) q;                   /* s = 2^(q-1) */
 266   mpz_init (t);
 267   mpz_init (rr);
 268   mpz_set_ui(t, 1);
 269   mpz_set_ui(s, 1);
 270   mpz_mul_2exp(s, s, q-1);
 271   expr = mpfr_get_z_2exp(rr, r);               /* no error here */
 272
 273   l = 0;
 274   for (;;) {
 275     l++;
 276     mpz_mul(t, t, rr);
 277     expt += expr;
 278     MPFR_MPZ_SIZEINBASE2 (sbit, s);
 279     MPFR_MPZ_SIZEINBASE2 (tbit, t);
 280     dif = *exps + sbit - expt - tbit;
 281     /* truncates the bits of t which are < ulp(s) = 2^(1-q) */
 282     expt += mpz_normalize(t, t, (mpfr_exp_t) q-dif); /* error at most 2^(1-q) */
 283     mpz_fdiv_q_ui (t, t, l);                   /* error at most 2^(1-q) */
 284     /* the error wrt t^l/l! is here at most 3*l*ulp(s) */
 285     MPFR_ASSERTD (expt == *exps);
 286     if (mpz_sgn (t) == 0)
 287       break;
 288     mpz_add(s, s, t);                      /* no error here: exact */
 289     /* ensures rr has the same size as t: after several shifts, the error
 290        on rr is still at most ulp(t)=ulp(s) */
 291     MPFR_MPZ_SIZEINBASE2 (tbit, t);
 292     expr += mpz_normalize(rr, rr, tbit);
 293   }
 294
 295   mpz_clear (t);
 296   mpz_clear (rr);
 297
 298   return 3 * l * (l + 1);
 299 }
 300
 301 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
 302    using Paterson-Stockmeyer algorithm with O(sqrt(l)) multiplications.
 303    Return l.
 304    Uses m multiplications of full size and 2l/m of decreasing size,
 305    i.e. a total equivalent to about m+l/m full multiplications,
 306    i.e. 2*sqrt(l) for m=sqrt(l).
 307    NOTE[VL]: The following sentence seems to be obsolete since MY_INIT_MPZ
 308    is no longer used (r6919); sizer was the number of limbs of r.
 309    Version using mpz. ss must have at least (sizer+1) limbs.
 310    The error is bounded by (l^2+4*l) ulps where l is the return value.
 311 */
 312 static unsigned long
 313 mpfr_exp2_aux2 (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps)
 314 {
 315   mpfr_exp_t expr, *expR, expt;
 316   mpfr_prec_t ql;
 317   unsigned long l, m, i;
 318   mpz_t t, *R, rr, tmp;
 319   mp_size_t sbit, rrbit;
 320   MPFR_TMP_DECL(marker);
 321
 322   /* estimate value of l */
 323   MPFR_ASSERTD (MPFR_GET_EXP (r) < 0);
 324   l = q / (- MPFR_GET_EXP (r));
 325   m = __gmpfr_isqrt (l);
 326   /* we access R[2], thus we need m >= 2 */
 327   if (m < 2)
 328     m = 2;
 329
 330   MPFR_TMP_MARK(marker);
 331   R = (mpz_t*) MPFR_TMP_ALLOC ((m + 1) * sizeof (mpz_t));     /* R[i] is r^i */
 332   expR = (mpfr_exp_t*) MPFR_TMP_ALLOC((m + 1) * sizeof (mpfr_exp_t));
 333   /* expR[i] is the exponent for R[i] */
 334   mpz_init (tmp);
 335   mpz_init (rr);
 336   mpz_init (t);
 337   mpz_set_ui (s, 0);
 338   *exps = 1 - q;                        /* 1 ulp = 2^(1-q) */
 339   for (i = 0 ; i <= m ; i++)
 340     mpz_init (R[i]);
 341   expR[1] = mpfr_get_z_2exp (R[1], r); /* exact operation: no error */
 342   expR[1] = mpz_normalize2 (R[1], R[1], expR[1], 1 - q); /* error <= 1 ulp */
 343   mpz_mul (t, R[1], R[1]); /* err(t) <= 2 ulps */
 344   mpz_fdiv_q_2exp (R[2], t, q - 1); /* err(R[2]) <= 3 ulps */
 345   expR[2] = 1 - q;
 346   for (i = 3 ; i <= m ; i++)
 347     {
 348       if ((i & 1) == 1)
 349         mpz_mul (t, R[i-1], R[1]); /* err(t) <= 2*i-2 */
 350       else
 351         mpz_mul (t, R[i/2], R[i/2]);
 352       mpz_fdiv_q_2exp (R[i], t, q - 1); /* err(R[i]) <= 2*i-1 ulps */
 353       expR[i] = 1 - q;
 354     }
 355   mpz_set_ui (R[0], 1);
 356   mpz_mul_2exp (R[0], R[0], q-1);
 357   expR[0] = 1-q; /* R[0]=1 */
 358   mpz_set_ui (rr, 1);
 359   expr = 0; /* rr contains r^l/l! */
 360   /* by induction: err(rr) <= 2*l ulps */
 361
 362   l = 0;
 363   ql = q; /* precision used for current giant step */
 364   do
 365     {
 366       /* all R[i] must have exponent 1-ql */
 367       if (l != 0)
 368         for (i = 0 ; i < m ; i++)
 369           expR[i] = mpz_normalize2 (R[i], R[i], expR[i], 1 - ql);
 370       /* the absolute error on R[i]*rr is still 2*i-1 ulps */
 371       expt = mpz_normalize2 (t, R[m-1], expR[m-1], 1 - ql);
 372       /* err(t) <= 2*m-1 ulps */
 373       /* computes t = 1 + r/(l+1) + ... + r^(m-1)*l!/(l+m-1)!
 374          using Horner's scheme */
 375       for (i = m-1 ; i-- != 0 ; )
 376         {
 377           mpz_fdiv_q_ui (t, t, l+i+1); /* err(t) += 1 ulp */
 378           mpz_add (t, t, R[i]);
 379         }
 380       /* now err(t) <= (3m-2) ulps */
 381
 382       /* now multiplies t by r^l/l! and adds to s */
 383       mpz_mul (t, t, rr);
 384       expt += expr;
 385       expt = mpz_normalize2 (t, t, expt, *exps);
 386       /* err(t) <= (3m-1) + err_rr(l) <= (3m-2) + 2*l */
 387       MPFR_ASSERTD (expt == *exps);
 388       mpz_add (s, s, t); /* no error here */
 389
 390       /* updates rr, the multiplication of the factors l+i could be done
 391          using binary splitting too, but it is not sure it would save much */
 392       mpz_mul (t, rr, R[m]); /* err(t) <= err(rr) + 2m-1 */
 393       expr += expR[m];
 394       mpz_set_ui (tmp, 1);
 395       for (i = 1 ; i <= m ; i++)
 396         mpz_mul_ui (tmp, tmp, l + i);
 397       mpz_fdiv_q (t, t, tmp); /* err(t) <= err(rr) + 2m */
 398       l += m;
 399       if (MPFR_UNLIKELY (mpz_sgn (t) == 0))
 400         break;
 401       expr += mpz_normalize (rr, t, ql); /* err_rr(l+1) <= err_rr(l) + 2m+1 */
 402       if (MPFR_UNLIKELY (mpz_sgn (rr) == 0))
 403         rrbit = 1;
 404       else
 405         MPFR_MPZ_SIZEINBASE2 (rrbit, rr);
 406       MPFR_MPZ_SIZEINBASE2 (sbit, s);
 407       ql = q - *exps - sbit + expr + rrbit;
 408       /* TODO: Wrong cast. I don't want what is right, but this is
 409          certainly wrong */
 410     }
 411   while ((size_t) expr + rrbit > (size_t) -q);
 412
 413   for (i = 0 ; i <= m ; i++)
 414     mpz_clear (R[i]);
 415   MPFR_TMP_FREE(marker);
 416   mpz_clear (rr);
 417   mpz_clear (t);
 418   mpz_clear (tmp);
 419
 420   return l * (l + 4);
 421 }