contrib/mpfr/src/fma.c

   1 /* mpfr_fma -- Floating multiply-add
   2
   3 Copyright 2001, 2002, 2004, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
   4 Contributed by the Arenaire and Caramel projects, INRIA.
   5
   6 This file is part of the GNU MPFR Library.
   7
   8 The GNU MPFR Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MPFR Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
  20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
  21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
  22
  23 #include "mpfr-impl.h"
  24
  25 /* The fused-multiply-add (fma) of x, y and z is defined by:
  26    fma(x,y,z)= x*y + z
  27 */
  28
  29 int
  30 mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
  31           mpfr_rnd_t rnd_mode)
  32 {
  33   int inexact;
  34   mpfr_t u;
  35   MPFR_SAVE_EXPO_DECL (expo);
  36   MPFR_GROUP_DECL(group);
  37
  38   MPFR_LOG_FUNC
  39     (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg  z[%Pu]=%.*Rg rnd=%d",
  40       mpfr_get_prec (x), mpfr_log_prec, x,
  41       mpfr_get_prec (y), mpfr_log_prec, y,
  42       mpfr_get_prec (z), mpfr_log_prec, z, rnd_mode),
  43      ("s[%Pu]=%.*Rg inexact=%d",
  44       mpfr_get_prec (s), mpfr_log_prec, s, inexact));
  45
  46   /* particular cases */
  47   if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ||
  48                      MPFR_IS_SINGULAR(y) ||
  49                      MPFR_IS_SINGULAR(z) ))
  50     {
  51       if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
  52         {
  53           MPFR_SET_NAN(s);
  54           MPFR_RET_NAN;
  55         }
  56       /* now neither x, y or z is NaN */
  57       else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
  58         {
  59           /* cases Inf*0+z, 0*Inf+z, Inf-Inf */
  60           if ((MPFR_IS_ZERO(y)) ||
  61               (MPFR_IS_ZERO(x)) ||
  62               (MPFR_IS_INF(z) &&
  63                ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z))))
  64             {
  65               MPFR_SET_NAN(s);
  66               MPFR_RET_NAN;
  67             }
  68           else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
  69             {
  70               MPFR_SET_INF(s);
  71               MPFR_SET_SAME_SIGN(s, z);
  72               MPFR_RET(0);
  73             }
  74           else /* z is finite */
  75             {
  76               MPFR_SET_INF(s);
  77               MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y)));
  78               MPFR_RET(0);
  79             }
  80         }
  81       /* now x and y are finite */
  82       else if (MPFR_IS_INF(z))
  83         {
  84           MPFR_SET_INF(s);
  85           MPFR_SET_SAME_SIGN(s, z);
  86           MPFR_RET(0);
  87         }
  88       else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
  89         {
  90           if (MPFR_IS_ZERO(z))
  91             {
  92               int sign_p;
  93               sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) );
  94               MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ?
  95                                ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z))
  96                                 ? -1 : 1) :
  97                                ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z))
  98                                 ? 1 : -1)));
  99               MPFR_SET_ZERO(s);
 100               MPFR_RET(0);
 101             }
 102           else
 103             return mpfr_set (s, z, rnd_mode);
 104         }
 105       else /* necessarily z is zero here */
 106         {
 107           MPFR_ASSERTD(MPFR_IS_ZERO(z));
 108           return mpfr_mul (s, x, y, rnd_mode);
 109         }
 110     }
 111
 112   /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
 113      is exact, except in case of overflow or underflow. */
 114   MPFR_SAVE_EXPO_MARK (expo);
 115   MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u);
 116
 117   if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
 118     {
 119       /* overflow or underflow - this case is regarded as rare, thus
 120          does not need to be very efficient (even if some tests below
 121          could have been done earlier).
 122          It is an overflow iff u is an infinity (since MPFR_RNDN was used).
 123          Alternatively, we could test the overflow flag, but in this case,
 124          mpfr_clear_flags would have been necessary. */
 125       if (MPFR_IS_INF (u))  /* overflow */
 126         {
 127           /* Let's eliminate the obvious case where x*y and z have the
 128              same sign. No possible cancellation -> real overflow.
 129              Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
 130              then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case
 131              is also an overflow. */
 132           if (MPFR_SIGN (u) == MPFR_SIGN (z) ||
 133               MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3)
 134             {
 135               MPFR_GROUP_CLEAR (group);
 136               MPFR_SAVE_EXPO_FREE (expo);
 137               return mpfr_overflow (s, rnd_mode, MPFR_SIGN (z));
 138             }
 139
 140           /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
 141              (x/4)*y does not overflow (let's recall that the result
 142              is exact with an unbounded exponent range). It does not
 143              underflow either, because x*y overflows and the exponent
 144              range is large enough. */
 145           inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN);
 146           MPFR_ASSERTN (inexact == 0);
 147           inexact = mpfr_mul (u, u, y, MPFR_RNDN);
 148           MPFR_ASSERTN (inexact == 0);
 149
 150           /* Now, we need to add z/4... But it may underflow! */
 151           {
 152             mpfr_t zo4;
 153             mpfr_srcptr zz;
 154             MPFR_BLOCK_DECL (flags);
 155
 156             if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) &&
 157                 MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u))
 158               {
 159                 /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
 160                 zz = z;
 161               }
 162             else
 163               {
 164                 mpfr_init2 (zo4, MPFR_PREC (z));
 165                 if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ))
 166                   {
 167                     /* The division by 4 underflowed! */
 168                     MPFR_ASSERTN (0); /* TODO... */
 169                   }
 170                 zz = zo4;
 171               }
 172
 173             /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
 174                following addition would give the same result). */
 175             MPFR_BLOCK (flags, inexact = mpfr_add (s, u, zz, rnd_mode));
 176             /* u and zz have different signs, so that an overflow
 177                is not possible. But an underflow is theoretically
 178                possible! */
 179             if (MPFR_UNDERFLOW (flags))
 180               {
 181                 MPFR_ASSERTN (zz != z);
 182                 MPFR_ASSERTN (0); /* TODO... */
 183                 mpfr_clears (zo4, u, (mpfr_ptr) 0);
 184               }
 185             else
 186               {
 187                 int inex2;
 188
 189                 if (zz != z)
 190                   mpfr_clear (zo4);
 191                 MPFR_GROUP_CLEAR (group);
 192                 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
 193                 inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode);
 194                 if (inex2)  /* overflow */
 195                   {
 196                     inexact = inex2;
 197                     MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 198                   }
 199                 goto end;
 200               }
 201           }
 202         }
 203       else  /* underflow: one has |xy| < 2^(emin-1). */
 204         {
 205           unsigned long scale = 0;
 206           mpfr_t scaled_z;
 207           mpfr_srcptr new_z;
 208           mpfr_exp_t diffexp;
 209           mpfr_prec_t pzs;
 210           int xy_underflows;
 211
 212           /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
 213              (the + 1 on MPFR_PREC (s) is necessary because the exponent
 214              of the result can be EXP(z) - 1). */
 215           diffexp = MPFR_GET_EXP (z) - __gmpfr_emin;
 216           pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1);
 217           if (diffexp <= pzs)
 218             {
 219               mpfr_uexp_t uscale;
 220               mpfr_t scaled_v;
 221               MPFR_BLOCK_DECL (flags);
 222
 223               uscale = (mpfr_uexp_t) pzs - diffexp + 1;
 224               MPFR_ASSERTN (uscale > 0);
 225               MPFR_ASSERTN (uscale <= ULONG_MAX);
 226               scale = uscale;
 227               mpfr_init2 (scaled_z, MPFR_PREC (z));
 228               inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN);
 229               MPFR_ASSERTN (inexact == 0);  /* TODO: overflow case */
 230               new_z = scaled_z;
 231               /* Now we need to recompute u = xy * 2^scale. */
 232               MPFR_BLOCK (flags,
 233                           if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y))
 234                             {
 235                               mpfr_init2 (scaled_v, MPFR_PREC (x));
 236                               mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN);
 237                               mpfr_mul (u, scaled_v, y, MPFR_RNDN);
 238                             }
 239                           else
 240                             {
 241                               mpfr_init2 (scaled_v, MPFR_PREC (y));
 242                               mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN);
 243                               mpfr_mul (u, x, scaled_v, MPFR_RNDN);
 244                             });
 245               mpfr_clear (scaled_v);
 246               MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
 247               xy_underflows = MPFR_UNDERFLOW (flags);
 248             }
 249           else
 250             {
 251               new_z = z;
 252               xy_underflows = 1;
 253             }
 254
 255           if (xy_underflows)
 256             {
 257               /* Let's replace xy by sign(xy) * 2^(emin-1). */
 258               MPFR_PREC (u) = MPFR_PREC_MIN;
 259               mpfr_setmin (u, __gmpfr_emin);
 260               MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
 261                                                 MPFR_SIGN (y)));
 262             }
 263
 264           {
 265             MPFR_BLOCK_DECL (flags);
 266
 267             MPFR_BLOCK (flags, inexact = mpfr_add (s, u, new_z, rnd_mode));
 268             MPFR_GROUP_CLEAR (group);
 269             if (scale != 0)
 270               {
 271                 int inex2;
 272
 273                 mpfr_clear (scaled_z);
 274                 /* Here an overflow is theoretically possible, in which case
 275                    the result may be wrong, hence the assert. An underflow
 276                    is not possible, but let's check that anyway. */
 277                 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));  /* TODO... */
 278                 MPFR_ASSERTN (! MPFR_UNDERFLOW (flags));  /* not possible */
 279                 inex2 = mpfr_div_2ui (s, s, scale, MPFR_RNDN);
 280                 /* FIXME: this seems incorrect. MPFR_RNDN -> rnd_mode?
 281                    Also, handle the double rounding case:
 282                    s / 2^scale = 2^(emin - 2) in MPFR_RNDN. */
 283                 if (inex2)  /* underflow */
 284                   inexact = inex2;
 285               }
 286           }
 287
 288           /* FIXME/TODO: I'm not sure that the following is correct.
 289              Check for possible spurious exceptions due to intermediate
 290              computations. */
 291           MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 292           goto end;
 293         }
 294     }
 295
 296   inexact = mpfr_add (s, u, z, rnd_mode);
 297   MPFR_GROUP_CLEAR (group);
 298   MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 299  end:
 300   MPFR_SAVE_EXPO_FREE (expo);
 301   return mpfr_check_range (s, inexact, rnd_mode);
 302 }