contrib/mpfr/src/pow_z.c

   1 /* mpfr_pow_z -- power function x^z with z a MPZ
   2
   3 Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
   4 Contributed by the Arenaire and Caramel projects, INRIA.
   5
   6 This file is part of the GNU MPFR Library.
   7
   8 The GNU MPFR Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MPFR Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
  20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
  21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
  22
  23 #define MPFR_NEED_LONGLONG_H
  24 #include "mpfr-impl.h"
  25
  26 /* y <- x^|z| with z != 0
  27    if cr=1: ensures correct rounding of y
  28    if cr=0: does not ensure correct rounding, but avoid spurious overflow
  29    or underflow, and uses the precision of y as working precision (warning,
  30    y and x might be the same variable). */
  31 static int
  32 mpfr_pow_pos_z (mpfr_ptr y, mpfr_srcptr x, mpz_srcptr z, mpfr_rnd_t rnd, int cr)
  33 {
  34   mpfr_t res;
  35   mpfr_prec_t prec, err;
  36   int inexact;
  37   mpfr_rnd_t rnd1, rnd2;
  38   mpz_t absz;
  39   mp_size_t size_z;
  40   MPFR_ZIV_DECL (loop);
  41   MPFR_BLOCK_DECL (flags);
  42
  43   MPFR_LOG_FUNC
  44     (("x[%Pu]=%.*Rg z=%Zd rnd=%d cr=%d",
  45       mpfr_get_prec (x), mpfr_log_prec, x, z, rnd, cr),
  46      ("y[%Pu]=%.*Rg inexact=%d",
  47       mpfr_get_prec (y), mpfr_log_prec, y, inexact));
  48
  49   MPFR_ASSERTD (mpz_sgn (z) != 0);
  50
  51   if (MPFR_UNLIKELY (mpz_cmpabs_ui (z, 1) == 0))
  52     return mpfr_set (y, x, rnd);
  53
  54   absz[0] = z[0];
  55   SIZ (absz) = ABS(SIZ(absz)); /* Hack to get abs(z) */
  56   MPFR_MPZ_SIZEINBASE2 (size_z, z);
  57
  58   /* round toward 1 (or -1) to avoid spurious overflow or underflow,
  59      i.e. if an overflow or underflow occurs, it is a real exception
  60      and is not just due to the rounding error. */
  61   rnd1 = (MPFR_EXP(x) >= 1) ? MPFR_RNDZ
  62     : (MPFR_IS_POS(x) ? MPFR_RNDU : MPFR_RNDD);
  63   rnd2 = (MPFR_EXP(x) >= 1) ? MPFR_RNDD : MPFR_RNDU;
  64
  65   if (cr != 0)
  66     prec = MPFR_PREC (y) + 3 + size_z + MPFR_INT_CEIL_LOG2 (MPFR_PREC (y));
  67   else
  68     prec = MPFR_PREC (y);
  69   mpfr_init2 (res, prec);
  70
  71   MPFR_ZIV_INIT (loop, prec);
  72   for (;;)
  73     {
  74       unsigned int inexmul;  /* will be non-zero if res may be inexact */
  75       mp_size_t i = size_z;
  76
  77       /* now 2^(i-1) <= z < 2^i */
  78       /* see below (case z < 0) for the error analysis, which is identical,
  79          except if z=n, the maximal relative error is here 2(n-1)2^(-prec)
  80          instead of 2(2n-1)2^(-prec) for z<0. */
  81       MPFR_ASSERTD (prec > (mpfr_prec_t) i);
  82       err = prec - 1 - (mpfr_prec_t) i;
  83
  84       MPFR_BLOCK (flags,
  85                   inexmul = mpfr_mul (res, x, x, rnd2);
  86                   MPFR_ASSERTD (i >= 2);
  87                   if (mpz_tstbit (absz, i - 2))
  88                     inexmul |= mpfr_mul (res, res, x, rnd1);
  89                   for (i -= 3; i >= 0 && !MPFR_BLOCK_EXCEP; i--)
  90                     {
  91                       inexmul |= mpfr_mul (res, res, res, rnd2);
  92                       if (mpz_tstbit (absz, i))
  93                         inexmul |= mpfr_mul (res, res, x, rnd1);
  94                     });
  95       if (MPFR_LIKELY (inexmul == 0 || cr == 0
  96                        || MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags)
  97                        || MPFR_CAN_ROUND (res, err, MPFR_PREC (y), rnd)))
  98         break;
  99       /* Can't decide correct rounding, increase the precision */
 100       MPFR_ZIV_NEXT (loop, prec);
 101       mpfr_set_prec (res, prec);
 102     }
 103   MPFR_ZIV_FREE (loop);
 104
 105   /* Check Overflow */
 106   if (MPFR_OVERFLOW (flags))
 107     {
 108       MPFR_LOG_MSG (("overflow\n", 0));
 109       inexact = mpfr_overflow (y, rnd, mpz_odd_p (absz) ?
 110                                MPFR_SIGN (x) : MPFR_SIGN_POS);
 111     }
 112   /* Check Underflow */
 113   else if (MPFR_UNDERFLOW (flags))
 114     {
 115       MPFR_LOG_MSG (("underflow\n", 0));
 116       if (rnd == MPFR_RNDN)
 117         {
 118           mpfr_t y2, zz;
 119
 120           /* We cannot decide now whether the result should be rounded
 121              toward zero or +Inf. So, let's use the general case of
 122              mpfr_pow, which can do that. But the problem is that the
 123              result can be exact! However, it is sufficient to try to
 124              round on 2 bits (the precision does not matter in case of
 125              underflow, since MPFR does not have subnormals), in which
 126              case, the result cannot be exact due to previous filtering
 127              of trivial cases. */
 128           MPFR_ASSERTD (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
 129                                           MPFR_EXP (x) - 1) != 0);
 130           mpfr_init2 (y2, 2);
 131           mpfr_init2 (zz, ABS (SIZ (z)) * GMP_NUMB_BITS);
 132           inexact = mpfr_set_z (zz, z, MPFR_RNDN);
 133           MPFR_ASSERTN (inexact == 0);
 134           inexact = mpfr_pow_general (y2, x, zz, rnd, 1,
 135                                       (mpfr_save_expo_t *) NULL);
 136           mpfr_clear (zz);
 137           mpfr_set (y, y2, MPFR_RNDN);
 138           mpfr_clear (y2);
 139           __gmpfr_flags = MPFR_FLAGS_INEXACT | MPFR_FLAGS_UNDERFLOW;
 140         }
 141       else
 142         {
 143           inexact = mpfr_underflow (y, rnd, mpz_odd_p (absz) ?
 144                                     MPFR_SIGN (x) : MPFR_SIGN_POS);
 145         }
 146     }
 147   else
 148     inexact = mpfr_set (y, res, rnd);
 149
 150   mpfr_clear (res);
 151   return inexact;
 152 }
 153
 154 /* The computation of y = pow(x,z) is done by
 155  *    y = set_ui(1)      if z = 0
 156  *    y = pow_ui(x,z)    if z > 0
 157  *    y = pow_ui(1/x,-z) if z < 0
 158  *
 159  * Note: in case z < 0, we could also compute 1/pow_ui(x,-z). However, in
 160  * case MAX < 1/MIN, where MAX is the largest positive value, i.e.,
 161  * MAX = nextbelow(+Inf), and MIN is the smallest positive value, i.e.,
 162  * MIN = nextabove(+0), then x^(-z) might produce an overflow, whereas
 163  * x^z is representable.
 164  */
 165
 166 int
 167 mpfr_pow_z (mpfr_ptr y, mpfr_srcptr x, mpz_srcptr z, mpfr_rnd_t rnd)
 168 {
 169   int   inexact;
 170   mpz_t tmp;
 171   MPFR_SAVE_EXPO_DECL (expo);
 172
 173   MPFR_LOG_FUNC
 174     (("x[%Pu]=%.*Rg z=%Zd rnd=%d",
 175       mpfr_get_prec (x), mpfr_log_prec, x, z, rnd),
 176      ("y[%Pu]=%.*Rg inexact=%d",
 177       mpfr_get_prec (y), mpfr_log_prec, y, inexact));
 178
 179   /* x^0 = 1 for any x, even a NaN */
 180   if (MPFR_UNLIKELY (mpz_sgn (z) == 0))
 181     return mpfr_set_ui (y, 1, rnd);
 182
 183   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
 184     {
 185       if (MPFR_IS_NAN (x))
 186         {
 187           MPFR_SET_NAN (y);
 188           MPFR_RET_NAN;
 189         }
 190       else if (MPFR_IS_INF (x))
 191         {
 192           /* Inf^n = Inf, (-Inf)^n = Inf for n even, -Inf for n odd */
 193           /* Inf ^(-n) = 0, sign = + if x>0 or z even */
 194           if (mpz_sgn (z) > 0)
 195             MPFR_SET_INF (y);
 196           else
 197             MPFR_SET_ZERO (y);
 198           if (MPFR_UNLIKELY (MPFR_IS_NEG (x) && mpz_odd_p (z)))
 199             MPFR_SET_NEG (y);
 200           else
 201             MPFR_SET_POS (y);
 202           MPFR_RET (0);
 203         }
 204       else /* x is zero */
 205         {
 206           MPFR_ASSERTD (MPFR_IS_ZERO(x));
 207           if (mpz_sgn (z) > 0)
 208             /* 0^n = +/-0 for any n */
 209             MPFR_SET_ZERO (y);
 210           else
 211             {
 212               /* 0^(-n) if +/- INF */
 213               MPFR_SET_INF (y);
 214               mpfr_set_divby0 ();
 215             }
 216           if (MPFR_LIKELY (MPFR_IS_POS (x) || mpz_even_p (z)))
 217             MPFR_SET_POS (y);
 218           else
 219             MPFR_SET_NEG (y);
 220           MPFR_RET(0);
 221         }
 222     }
 223
 224   MPFR_SAVE_EXPO_MARK (expo);
 225
 226   /* detect exact powers: x^-n is exact iff x is a power of 2
 227      Do it if n > 0 too as this is faster and this filtering is
 228      needed in case of underflow. */
 229   if (MPFR_UNLIKELY (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
 230                                        MPFR_EXP (x) - 1) == 0))
 231     {
 232       mpfr_exp_t expx = MPFR_EXP (x); /* warning: x and y may be the same
 233                                          variable */
 234
 235       MPFR_LOG_MSG (("x^n with x power of two\n", 0));
 236       mpfr_set_si (y, mpz_odd_p (z) ? MPFR_INT_SIGN(x) : 1, rnd);
 237       MPFR_ASSERTD (MPFR_IS_FP (y));
 238       mpz_init (tmp);
 239       mpz_mul_si (tmp, z, expx - 1);
 240       MPFR_ASSERTD (MPFR_GET_EXP (y) == 1);
 241       mpz_add_ui (tmp, tmp, 1);
 242       inexact = 0;
 243       if (MPFR_UNLIKELY (mpz_cmp_si (tmp, __gmpfr_emin) < 0))
 244         {
 245           MPFR_LOG_MSG (("underflow\n", 0));
 246           /* |y| is a power of two, thus |y| <= 2^(emin-2), and in
 247              rounding to nearest, the value must be rounded to 0. */
 248           if (rnd == MPFR_RNDN)
 249             rnd = MPFR_RNDZ;
 250           inexact = mpfr_underflow (y, rnd, MPFR_SIGN (y));
 251         }
 252       else if (MPFR_UNLIKELY (mpz_cmp_si (tmp, __gmpfr_emax) > 0))
 253         {
 254           MPFR_LOG_MSG (("overflow\n", 0));
 255           inexact = mpfr_overflow (y, rnd, MPFR_SIGN (y));
 256         }
 257       else
 258         MPFR_SET_EXP (y, mpz_get_si (tmp));
 259       mpz_clear (tmp);
 260       MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 261     }
 262   else if (mpz_sgn (z) > 0)
 263     {
 264       inexact = mpfr_pow_pos_z (y, x, z, rnd, 1);
 265       MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 266     }
 267   else
 268     {
 269       /* Declaration of the intermediary variable */
 270       mpfr_t t;
 271       mpfr_prec_t Nt;   /* Precision of the intermediary variable */
 272       mpfr_rnd_t rnd1;
 273       mp_size_t size_z;
 274       MPFR_ZIV_DECL (loop);
 275
 276       MPFR_MPZ_SIZEINBASE2 (size_z, z);
 277
 278       /* initial working precision */
 279       Nt = MPFR_PREC (y);
 280       Nt = Nt + size_z + 3 + MPFR_INT_CEIL_LOG2 (Nt);
 281       /* ensures Nt >= bits(z)+2 */
 282
 283       /* initialise of intermediary variable */
 284       mpfr_init2 (t, Nt);
 285
 286       /* We will compute rnd(rnd1(1/x) ^ (-z)), where rnd1 is the rounding
 287          toward sign(x), to avoid spurious overflow or underflow. */
 288       rnd1 = MPFR_EXP (x) < 1 ? MPFR_RNDZ :
 289         (MPFR_SIGN (x) > 0 ? MPFR_RNDU : MPFR_RNDD);
 290
 291       MPFR_ZIV_INIT (loop, Nt);
 292       for (;;)
 293         {
 294           MPFR_BLOCK_DECL (flags);
 295
 296           /* compute (1/x)^(-z), -z>0 */
 297           /* As emin = -emax, an underflow cannot occur in the division.
 298              And if an overflow occurs, then this means that x^z overflows
 299              too (since we have rounded toward 1 or -1). */
 300           MPFR_BLOCK (flags, mpfr_ui_div (t, 1, x, rnd1));
 301           MPFR_ASSERTD (! MPFR_UNDERFLOW (flags));
 302           /* t = (1/x)*(1+theta) where |theta| <= 2^(-Nt) */
 303           if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags)))
 304             goto overflow;
 305           MPFR_BLOCK (flags, mpfr_pow_pos_z (t, t, z, rnd, 0));
 306           /* Now if z=-n, t = x^z*(1+theta)^(2n-1) where |theta| <= 2^(-Nt),
 307              with theta maybe different from above. If (2n-1)*2^(-Nt) <= 1/2,
 308              which is satisfied as soon as Nt >= bits(z)+2, then we can use
 309              Lemma \ref{lemma_graillat} from algorithms.tex, which yields
 310              t = x^z*(1+theta) with |theta| <= 2(2n-1)*2^(-Nt), thus the
 311              error is bounded by 2(2n-1) ulps <= 2^(bits(z)+2) ulps. */
 312           if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags)))
 313             {
 314             overflow:
 315               MPFR_ZIV_FREE (loop);
 316               mpfr_clear (t);
 317               MPFR_SAVE_EXPO_FREE (expo);
 318               MPFR_LOG_MSG (("overflow\n", 0));
 319               return mpfr_overflow (y, rnd,
 320                                     mpz_odd_p (z) ? MPFR_SIGN (x) :
 321                                     MPFR_SIGN_POS);
 322             }
 323           if (MPFR_UNLIKELY (MPFR_UNDERFLOW (flags)))
 324             {
 325               MPFR_ZIV_FREE (loop);
 326               mpfr_clear (t);
 327               MPFR_LOG_MSG (("underflow\n", 0));
 328               if (rnd == MPFR_RNDN)
 329                 {
 330                   mpfr_t y2, zz;
 331
 332                   /* We cannot decide now whether the result should be
 333                      rounded toward zero or away from zero. So, like
 334                      in mpfr_pow_pos_z, let's use the general case of
 335                      mpfr_pow in precision 2. */
 336                   MPFR_ASSERTD (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
 337                                                   MPFR_EXP (x) - 1) != 0);
 338                   mpfr_init2 (y2, 2);
 339                   mpfr_init2 (zz, ABS (SIZ (z)) * GMP_NUMB_BITS);
 340                   inexact = mpfr_set_z (zz, z, MPFR_RNDN);
 341                   MPFR_ASSERTN (inexact == 0);
 342                   inexact = mpfr_pow_general (y2, x, zz, rnd, 1,
 343                                               (mpfr_save_expo_t *) NULL);
 344                   mpfr_clear (zz);
 345                   mpfr_set (y, y2, MPFR_RNDN);
 346                   mpfr_clear (y2);
 347                   MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW);
 348                   goto end;
 349                 }
 350               else
 351                 {
 352                   MPFR_SAVE_EXPO_FREE (expo);
 353                   return mpfr_underflow (y, rnd, mpz_odd_p (z) ?
 354                                          MPFR_SIGN (x) : MPFR_SIGN_POS);
 355                 }
 356             }
 357           if (MPFR_LIKELY (MPFR_CAN_ROUND (t, Nt - size_z - 2, MPFR_PREC (y),
 358                                            rnd)))
 359             break;
 360           /* actualisation of the precision */
 361           MPFR_ZIV_NEXT (loop, Nt);
 362           mpfr_set_prec (t, Nt);
 363         }
 364       MPFR_ZIV_FREE (loop);
 365
 366       inexact = mpfr_set (y, t, rnd);
 367       mpfr_clear (t);
 368     }
 369
 370  end:
 371   MPFR_SAVE_EXPO_FREE (expo);
 372   return mpfr_check_range (y, inexact, rnd);
 373 }