contrib/mpfr/pow_z.c

   1 /* mpfr_pow_z -- power function x^z with z a MPZ
   2
   3 Copyright 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
   4 Contributed by the Arenaire and Cacao projects, INRIA.
   5
   6 This file is part of the GNU MPFR Library.
   7
   8 The GNU MPFR Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 2.1 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MPFR Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MPFR Library; see the file COPYING.LIB.  If not, write to
  20 the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
  21 MA 02110-1301, USA. */
  22
  23 #define MPFR_NEED_LONGLONG_H
  24 #include "mpfr-impl.h"
  25
  26 /* y <- x^|z| with z != 0
  27    if cr=1: ensures correct rounding of y
  28    if cr=0: does not ensure correct rounding, but avoid spurious overflow
  29    or underflow, and uses the precision of y as working precision (warning,
  30    y and x might be the same variable). */
  31 static int
  32 mpfr_pow_pos_z (mpfr_ptr y, mpfr_srcptr x, mpz_srcptr z, mp_rnd_t rnd, int cr)
  33 {
  34   mpfr_t res;
  35   mp_prec_t prec, err;
  36   int inexact;
  37   mp_rnd_t rnd1, rnd2;
  38   mpz_t absz;
  39   mp_size_t size_z;
  40   MPFR_ZIV_DECL (loop);
  41   MPFR_BLOCK_DECL (flags);
  42
  43   MPFR_LOG_FUNC (("x[%#R]=%R z=? rnd=%d cr=%d", x, x, rnd, cr),
  44                  ("y[%#R]=%R inexact=%d", y, y, inexact));
  45
  46   MPFR_ASSERTD (mpz_sgn (z) != 0);
  47
  48   if (MPFR_UNLIKELY (mpz_cmpabs_ui (z, 1) == 0))
  49     return mpfr_set (y, x, rnd);
  50
  51   absz[0] = z[0];
  52   SIZ (absz) = ABS(SIZ(absz)); /* Hack to get abs(z) */
  53   MPFR_MPZ_SIZEINBASE2 (size_z, z);
  54
  55   /* round toward 1 (or -1) to avoid spurious overflow or underflow,
  56      i.e. if an overflow or underflow occurs, it is a real exception
  57      and is not just due to the rounding error. */
  58   rnd1 = (MPFR_EXP(x) >= 1) ? GMP_RNDZ
  59     : (MPFR_IS_POS(x) ? GMP_RNDU : GMP_RNDD);
  60   rnd2 = (MPFR_EXP(x) >= 1) ? GMP_RNDD : GMP_RNDU;
  61
  62   if (cr != 0)
  63     prec = MPFR_PREC (y) + 3 + size_z + MPFR_INT_CEIL_LOG2 (MPFR_PREC (y));
  64   else
  65     prec = MPFR_PREC (y);
  66   mpfr_init2 (res, prec);
  67
  68   MPFR_ZIV_INIT (loop, prec);
  69   for (;;)
  70     {
  71       unsigned int inexmul;  /* will be non-zero if res may be inexact */
  72       mp_size_t i = size_z;
  73
  74       /* now 2^(i-1) <= z < 2^i */
  75       /* see below (case z < 0) for the error analysis, which is identical,
  76          except if z=n, the maximal relative error is here 2(n-1)2^(-prec)
  77          instead of 2(2n-1)2^(-prec) for z<0. */
  78       MPFR_ASSERTD (prec > (mpfr_prec_t) i);
  79       err = prec - 1 - (mpfr_prec_t) i;
  80
  81       MPFR_BLOCK (flags,
  82                   inexmul = mpfr_mul (res, x, x, rnd2);
  83                   MPFR_ASSERTD (i >= 2);
  84                   if (mpz_tstbit (absz, i - 2))
  85                     inexmul |= mpfr_mul (res, res, x, rnd1);
  86                   for (i -= 3; i >= 0 && !MPFR_BLOCK_EXCEP; i--)
  87                     {
  88                       inexmul |= mpfr_mul (res, res, res, rnd2);
  89                       if (mpz_tstbit (absz, i))
  90                         inexmul |= mpfr_mul (res, res, x, rnd1);
  91                     });
  92       if (MPFR_LIKELY (inexmul == 0 || cr == 0
  93                        || MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags)
  94                        || MPFR_CAN_ROUND (res, err, MPFR_PREC (y), rnd)))
  95         break;
  96       /* Can't decide correct rounding, increase the precision */
  97       MPFR_ZIV_NEXT (loop, prec);
  98       mpfr_set_prec (res, prec);
  99     }
 100   MPFR_ZIV_FREE (loop);
 101
 102   /* Check Overflow */
 103   if (MPFR_OVERFLOW (flags))
 104     {
 105       MPFR_LOG_MSG (("overflow\n", 0));
 106       inexact = mpfr_overflow (y, rnd, mpz_odd_p (absz) ?
 107                                MPFR_SIGN (x) : MPFR_SIGN_POS);
 108     }
 109   /* Check Underflow */
 110   else if (MPFR_UNDERFLOW (flags))
 111     {
 112       MPFR_LOG_MSG (("underflow\n", 0));
 113       if (rnd == GMP_RNDN)
 114         {
 115           mpfr_t y2, zz;
 116
 117           /* We cannot decide now whether the result should be rounded
 118              toward zero or +Inf. So, let's use the general case of
 119              mpfr_pow, which can do that. But the problem is that the
 120              result can be exact! However, it is sufficient to try to
 121              round on 2 bits (the precision does not matter in case of
 122              underflow, since MPFR does not have subnormals), in which
 123              case, the result cannot be exact due to previous filtering
 124              of trivial cases. */
 125           MPFR_ASSERTD (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
 126                                           MPFR_EXP (x) - 1) != 0);
 127           mpfr_init2 (y2, 2);
 128           mpfr_init2 (zz, ABS (SIZ (z)) * BITS_PER_MP_LIMB);
 129           inexact = mpfr_set_z (zz, z, GMP_RNDN);
 130           MPFR_ASSERTN (inexact == 0);
 131           inexact = mpfr_pow_general (y2, x, zz, rnd, 1,
 132                                       (mpfr_save_expo_t *) NULL);
 133           mpfr_clear (zz);
 134           mpfr_set (y, y2, GMP_RNDN);
 135           mpfr_clear (y2);
 136           __gmpfr_flags = MPFR_FLAGS_INEXACT | MPFR_FLAGS_UNDERFLOW;
 137         }
 138       else
 139         {
 140           inexact = mpfr_underflow (y, rnd, mpz_odd_p (absz) ?
 141                                     MPFR_SIGN (x) : MPFR_SIGN_POS);
 142         }
 143     }
 144   else
 145     inexact = mpfr_set (y, res, rnd);
 146
 147   mpfr_clear (res);
 148   return inexact;
 149 }
 150
 151 /* The computation of y = pow(x,z) is done by
 152  *    y = set_ui(1)      if z = 0
 153  *    y = pow_ui(x,z)    if z > 0
 154  *    y = pow_ui(1/x,-z) if z < 0
 155  *
 156  * Note: in case z < 0, we could also compute 1/pow_ui(x,-z). However, in
 157  * case MAX < 1/MIN, where MAX is the largest positive value, i.e.,
 158  * MAX = nextbelow(+Inf), and MIN is the smallest positive value, i.e.,
 159  * MIN = nextabove(+0), then x^(-z) might produce an overflow, whereas
 160  * x^z is representable.
 161  */
 162
 163 int
 164 mpfr_pow_z (mpfr_ptr y, mpfr_srcptr x, mpz_srcptr z, mp_rnd_t rnd)
 165 {
 166   int   inexact;
 167   mpz_t tmp;
 168   MPFR_SAVE_EXPO_DECL (expo);
 169
 170   MPFR_LOG_FUNC (("x[%#R]=%R z=? rnd=%d", x, x, rnd),
 171                  ("y[%#R]=%R inexact=%d", y, y, inexact));
 172
 173   /* x^0 = 1 for any x, even a NaN */
 174   if (MPFR_UNLIKELY (mpz_sgn (z) == 0))
 175     return mpfr_set_ui (y, 1, rnd);
 176
 177   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
 178     {
 179       if (MPFR_IS_NAN (x))
 180         {
 181           MPFR_SET_NAN (y);
 182           MPFR_RET_NAN;
 183         }
 184       else if (MPFR_IS_INF (x))
 185         {
 186           /* Inf^n = Inf, (-Inf)^n = Inf for n even, -Inf for n odd */
 187           /* Inf ^(-n) = 0, sign = + if x>0 or z even */
 188           if (mpz_sgn (z) > 0)
 189             MPFR_SET_INF (y);
 190           else
 191             MPFR_SET_ZERO (y);
 192           if (MPFR_UNLIKELY (MPFR_IS_NEG (x) && mpz_odd_p (z)))
 193             MPFR_SET_NEG (y);
 194           else
 195             MPFR_SET_POS (y);
 196           MPFR_RET (0);
 197         }
 198       else /* x is zero */
 199         {
 200           MPFR_ASSERTD (MPFR_IS_ZERO(x));
 201           if (mpz_sgn (z) > 0)
 202             /* 0^n = +/-0 for any n */
 203             MPFR_SET_ZERO (y);
 204           else
 205             /* 0^(-n) if +/- INF */
 206             MPFR_SET_INF (y);
 207           if (MPFR_LIKELY (MPFR_IS_POS (x) || mpz_even_p (z)))
 208             MPFR_SET_POS (y);
 209           else
 210             MPFR_SET_NEG (y);
 211           MPFR_RET(0);
 212         }
 213     }
 214
 215   /* detect exact powers: x^-n is exact iff x is a power of 2
 216      Do it if n > 0 too as this is faster and this filtering is
 217      needed in case of underflow. */
 218   if (MPFR_UNLIKELY (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
 219                                        MPFR_EXP (x) - 1) == 0))
 220     {
 221       mp_exp_t expx = MPFR_EXP (x); /* warning: x and y may be the same
 222                                        variable */
 223
 224       MPFR_LOG_MSG (("x^n with x power of two\n", 0));
 225       mpfr_set_si (y, mpz_odd_p (z) ? MPFR_INT_SIGN(x) : 1, rnd);
 226       MPFR_ASSERTD (MPFR_IS_FP (y));
 227       mpz_init (tmp);
 228       mpz_mul_si (tmp, z, expx - 1);
 229       MPFR_ASSERTD (MPFR_GET_EXP (y) == 1);
 230       mpz_add_ui (tmp, tmp, 1);
 231       inexact = 0;
 232       if (MPFR_UNLIKELY (mpz_cmp_si (tmp, __gmpfr_emin) < 0))
 233         {
 234           MPFR_LOG_MSG (("underflow\n", 0));
 235           /* |y| is a power of two, thus |y| <= 2^(emin-2), and in
 236              rounding to nearest, the value must be rounded to 0. */
 237           if (rnd == GMP_RNDN)
 238             rnd = GMP_RNDZ;
 239           inexact = mpfr_underflow (y, rnd, MPFR_SIGN (y));
 240         }
 241       else if (MPFR_UNLIKELY (mpz_cmp_si (tmp, __gmpfr_emax) > 0))
 242         {
 243           MPFR_LOG_MSG (("overflow\n", 0));
 244           inexact = mpfr_overflow (y, rnd, MPFR_SIGN (y));
 245         }
 246       else
 247         MPFR_SET_EXP (y, mpz_get_si (tmp));
 248       mpz_clear (tmp);
 249       MPFR_RET (inexact);
 250     }
 251
 252   MPFR_SAVE_EXPO_MARK (expo);
 253
 254   if (mpz_sgn (z) > 0)
 255     {
 256       inexact = mpfr_pow_pos_z (y, x, z, rnd, 1);
 257       MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 258     }
 259   else
 260     {
 261       /* Declaration of the intermediary variable */
 262       mpfr_t t;
 263       mp_prec_t Nt;   /* Precision of the intermediary variable */
 264       mp_rnd_t rnd1;
 265       mp_size_t size_z;
 266       MPFR_ZIV_DECL (loop);
 267
 268       MPFR_MPZ_SIZEINBASE2 (size_z, z);
 269
 270       /* initial working precision */
 271       Nt = MPFR_PREC (y);
 272       Nt = Nt + size_z + 3 + MPFR_INT_CEIL_LOG2 (Nt);
 273       /* ensures Nt >= bits(z)+2 */
 274
 275       /* initialise of intermediary variable */
 276       mpfr_init2 (t, Nt);
 277
 278       /* We will compute rnd(rnd1(1/x) ^ (-z)), where rnd1 is the rounding
 279          toward sign(x), to avoid spurious overflow or underflow. */
 280       rnd1 = MPFR_EXP (x) < 1 ? GMP_RNDZ :
 281         (MPFR_SIGN (x) > 0 ? GMP_RNDU : GMP_RNDD);
 282
 283       MPFR_ZIV_INIT (loop, Nt);
 284       for (;;)
 285         {
 286           MPFR_BLOCK_DECL (flags);
 287
 288           /* compute (1/x)^(-z), -z>0 */
 289           /* As emin = -emax, an underflow cannot occur in the division.
 290              And if an overflow occurs, then this means that x^z overflows
 291              too (since we have rounded toward 1 or -1). */
 292           MPFR_BLOCK (flags, mpfr_ui_div (t, 1, x, rnd1));
 293           MPFR_ASSERTD (! MPFR_UNDERFLOW (flags));
 294           /* t = (1/x)*(1+theta) where |theta| <= 2^(-Nt) */
 295           if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags)))
 296             goto overflow;
 297           MPFR_BLOCK (flags, mpfr_pow_pos_z (t, t, z, rnd, 0));
 298           /* Now if z=-n, t = x^z*(1+theta)^(2n-1) where |theta| <= 2^(-Nt),
 299              with theta maybe different from above. If (2n-1)*2^(-Nt) <= 1/2,
 300              which is satisfied as soon as Nt >= bits(z)+2, then we can use
 301              Lemma \ref{lemma_graillat} from algorithms.tex, which yields
 302              t = x^z*(1+theta) with |theta| <= 2(2n-1)*2^(-Nt), thus the
 303              error is bounded by 2(2n-1) ulps <= 2^(bits(z)+2) ulps. */
 304           if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags)))
 305             {
 306             overflow:
 307               MPFR_ZIV_FREE (loop);
 308               mpfr_clear (t);
 309               MPFR_SAVE_EXPO_FREE (expo);
 310               MPFR_LOG_MSG (("overflow\n", 0));
 311               return mpfr_overflow (y, rnd,
 312                                     mpz_odd_p (z) ? MPFR_SIGN (x) :
 313                                     MPFR_SIGN_POS);
 314             }
 315           if (MPFR_UNLIKELY (MPFR_UNDERFLOW (flags)))
 316             {
 317               MPFR_ZIV_FREE (loop);
 318               mpfr_clear (t);
 319               MPFR_LOG_MSG (("underflow\n", 0));
 320               if (rnd == GMP_RNDN)
 321                 {
 322                   mpfr_t y2, zz;
 323
 324                   /* We cannot decide now whether the result should be
 325                      rounded toward zero or away from zero. So, like
 326                      in mpfr_pow_pos_z, let's use the general case of
 327                      mpfr_pow in precision 2. */
 328                   MPFR_ASSERTD (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
 329                                                   MPFR_EXP (x) - 1) != 0);
 330                   mpfr_init2 (y2, 2);
 331                   mpfr_init2 (zz, ABS (SIZ (z)) * BITS_PER_MP_LIMB);
 332                   inexact = mpfr_set_z (zz, z, GMP_RNDN);
 333                   MPFR_ASSERTN (inexact == 0);
 334                   inexact = mpfr_pow_general (y2, x, zz, rnd, 1,
 335                                               (mpfr_save_expo_t *) NULL);
 336                   mpfr_clear (zz);
 337                   mpfr_set (y, y2, GMP_RNDN);
 338                   mpfr_clear (y2);
 339                   MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW);
 340                   goto end;
 341                 }
 342               else
 343                 {
 344                   MPFR_SAVE_EXPO_FREE (expo);
 345                   return mpfr_underflow (y, rnd, mpz_odd_p (z) ?
 346                                          MPFR_SIGN (x) : MPFR_SIGN_POS);
 347                 }
 348             }
 349           if (MPFR_LIKELY (MPFR_CAN_ROUND (t, Nt - size_z - 2, MPFR_PREC (y),
 350                                            rnd)))
 351             break;
 352           /* actualisation of the precision */
 353           MPFR_ZIV_NEXT (loop, Nt);
 354           mpfr_set_prec (t, Nt);
 355         }
 356       MPFR_ZIV_FREE (loop);
 357
 358       inexact = mpfr_set (y, t, rnd);
 359       mpfr_clear (t);
 360     }
 361
 362  end:
 363   MPFR_SAVE_EXPO_FREE (expo);
 364   return mpfr_check_range (y, inexact, rnd);
 365 }