contrib/openbsd_libm/src/s_fma.c

   1 /*      $OpenBSD: s_fma.c,v 1.6 2013/11/12 19:00:38 martynas Exp $      */
   2
   3 /*-
   4  * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26  * SUCH DAMAGE.
  27  */
  28
  29 #include <fenv.h>
  30 #include <float.h>
  31 #include <math.h>
  32
  33 /*
  34  * Fused multiply-add: Compute x * y + z with a single rounding error.
  35  *
  36  * We use scaling to avoid overflow/underflow, along with the
  37  * canonical precision-doubling technique adapted from:
  38  *
  39  *      Dekker, T.  A Floating-Point Technique for Extending the
  40  *      Available Precision.  Numer. Math. 18, 224-242 (1971).
  41  *
  42  * This algorithm is sensitive to the rounding precision.  FPUs such
  43  * as the i387 must be set in double-precision mode if variables are
  44  * to be stored in FP registers in order to avoid incorrect results.
  45  * This is the default on FreeBSD, but not on many other systems.
  46  *
  47  * Hardware instructions should be used on architectures that support it,
  48  * since this implementation will likely be several times slower.
  49  */
  50 #if LDBL_MANT_DIG != 113
  51 double
  52 fma(double x, double y, double z)
  53 {
  54         static const double split = 0x1p27 + 1.0;
  55         double xs, ys, zs;
  56         double c, cc, hx, hy, p, q, tx, ty;
  57         double r, rr, s;
  58         int oround;
  59         int ex, ey, ez;
  60         int spread;
  61
  62         /*
  63          * Handle special cases. The order of operations and the particular
  64          * return values here are crucial in handling special cases involving
  65          * infinities, NaNs, overflows, and signed zeroes correctly.
  66          */
  67         if (x == 0.0 || y == 0.0)
  68                 return (x * y + z);
  69         if (z == 0.0)
  70                 return (x * y);
  71         if (!isfinite(x) || !isfinite(y))
  72                 return (x * y + z);
  73         if (!isfinite(z))
  74                 return (z);
  75
  76         xs = frexp(x, &ex);
  77         ys = frexp(y, &ey);
  78         zs = frexp(z, &ez);
  79         oround = fegetround();
  80         spread = ex + ey - ez;
  81
  82         /*
  83          * If x * y and z are many orders of magnitude apart, the scaling
  84          * will overflow, so we handle these cases specially.  Rounding
  85          * modes other than FE_TONEAREST are painful.
  86          */
  87         if (spread > DBL_MANT_DIG * 2) {
  88                 fenv_t env;
  89                 feraiseexcept(FE_INEXACT);
  90                 switch(oround) {
  91                 case FE_TONEAREST:
  92                         return (x * y);
  93                 case FE_TOWARDZERO:
  94                         if ((x > 0.0) ^ (y < 0.0) ^ (z < 0.0))
  95                                 return (x * y);
  96                         feholdexcept(&env);
  97                         r = x * y;
  98                         if (!fetestexcept(FE_INEXACT))
  99                                 r = nextafter(r, 0);
 100                         feupdateenv(&env);
 101                         return (r);
 102                 case FE_DOWNWARD:
 103                         if (z > 0.0)
 104                                 return (x * y);
 105                         feholdexcept(&env);
 106                         r = x * y;
 107                         if (!fetestexcept(FE_INEXACT))
 108                                 r = nextafter(r, -INFINITY);
 109                         feupdateenv(&env);
 110                         return (r);
 111                 default:        /* FE_UPWARD */
 112                         if (z < 0.0)
 113                                 return (x * y);
 114                         feholdexcept(&env);
 115                         r = x * y;
 116                         if (!fetestexcept(FE_INEXACT))
 117                                 r = nextafter(r, INFINITY);
 118                         feupdateenv(&env);
 119                         return (r);
 120                 }
 121         }
 122         if (spread < -DBL_MANT_DIG) {
 123                 feraiseexcept(FE_INEXACT);
 124                 if (!isnormal(z))
 125                         feraiseexcept(FE_UNDERFLOW);
 126                 switch (oround) {
 127                 case FE_TONEAREST:
 128                         return (z);
 129                 case FE_TOWARDZERO:
 130                         if ((x > 0.0) ^ (y < 0.0) ^ (z < 0.0))
 131                                 return (z);
 132                         else
 133                                 return (nextafter(z, 0));
 134                 case FE_DOWNWARD:
 135                         if ((x > 0.0) ^ (y < 0.0))
 136                                 return (z);
 137                         else
 138                                 return (nextafter(z, -INFINITY));
 139                 default:        /* FE_UPWARD */
 140                         if ((x > 0.0) ^ (y < 0.0))
 141                                 return (nextafter(z, INFINITY));
 142                         else
 143                                 return (z);
 144                 }
 145         }
 146
 147         /*
 148          * Use Dekker's algorithm to perform the multiplication and
 149          * subsequent addition in twice the machine precision.
 150          * Arrange so that x * y = c + cc, and x * y + z = r + rr.
 151          */
 152         fesetround(FE_TONEAREST);
 153
 154         p = xs * split;
 155         hx = xs - p;
 156         hx += p;
 157         tx = xs - hx;
 158
 159         p = ys * split;
 160         hy = ys - p;
 161         hy += p;
 162         ty = ys - hy;
 163
 164         p = hx * hy;
 165         q = hx * ty + tx * hy;
 166         c = p + q;
 167         cc = p - c + q + tx * ty;
 168
 169         zs = ldexp(zs, -spread);
 170         r = c + zs;
 171         s = r - c;
 172         rr = (c - (r - s)) + (zs - s) + cc;
 173
 174         spread = ex + ey;
 175         if (spread + ilogb(r) > -1023) {
 176                 fesetround(oround);
 177                 r = r + rr;
 178         } else {
 179                 /*
 180                  * The result is subnormal, so we round before scaling to
 181                  * avoid double rounding.
 182                  */
 183                 p = ldexp(copysign(0x1p-1022, r), -spread);
 184                 c = r + p;
 185                 s = c - r;
 186                 cc = (r - (c - s)) + (p - s) + rr;
 187                 fesetround(oround);
 188                 r = (c + cc) - p;
 189         }
 190         return (ldexp(r, spread));
 191 }
 192 #else   /* LDBL_MANT_DIG == 113 */
 193 /*
 194  * 113 bits of precision is more than twice the precision of a double,
 195  * so it is enough to represent the intermediate product exactly.
 196  */
 197 double
 198 fma(double x, double y, double z)
 199 {
 200         return ((long double)x * y + z);
 201 }
 202 #endif  /* LDBL_MANT_DIG != 113 */
 203
 204 #if     LDBL_MANT_DIG == DBL_MANT_DIG
 205 __strong_alias(fmal, fma);
 206 #endif  /* LDBL_MANT_DIG == DBL_MANT_DIG */