1 /* mpfr_fma -- Floating multiply-add
3 Copyright 2001, 2002, 2004, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4 Contributed by the Arenaire and Caramel projects, INRIA.
6 This file is part of the GNU MPFR Library.
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23 #include "mpfr-impl.h"
25 /* The fused-multiply-add (fma) of x, y and z is defined by:
30 mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
35 MPFR_SAVE_EXPO_DECL (expo);
36 MPFR_GROUP_DECL(group);
39 (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg z[%Pu]=%.*Rg rnd=%d",
40 mpfr_get_prec (x), mpfr_log_prec, x,
41 mpfr_get_prec (y), mpfr_log_prec, y,
42 mpfr_get_prec (z), mpfr_log_prec, z, rnd_mode),
43 ("s[%Pu]=%.*Rg inexact=%d",
44 mpfr_get_prec (s), mpfr_log_prec, s, inexact));
46 /* particular cases */
47 if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ||
48 MPFR_IS_SINGULAR(y) ||
49 MPFR_IS_SINGULAR(z) ))
51 if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
56 /* now neither x, y or z is NaN */
57 else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
59 /* cases Inf*0+z, 0*Inf+z, Inf-Inf */
60 if ((MPFR_IS_ZERO(y)) ||
63 ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z))))
68 else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
71 MPFR_SET_SAME_SIGN(s, z);
74 else /* z is finite */
77 MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y)));
81 /* now x and y are finite */
82 else if (MPFR_IS_INF(z))
85 MPFR_SET_SAME_SIGN(s, z);
88 else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
93 sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) );
94 MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ?
95 ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z))
97 ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z))
103 return mpfr_set (s, z, rnd_mode);
105 else /* necessarily z is zero here */
107 MPFR_ASSERTD(MPFR_IS_ZERO(z));
108 return mpfr_mul (s, x, y, rnd_mode);
112 /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
113 is exact, except in case of overflow or underflow. */
114 MPFR_SAVE_EXPO_MARK (expo);
115 MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u);
117 if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
119 /* overflow or underflow - this case is regarded as rare, thus
120 does not need to be very efficient (even if some tests below
121 could have been done earlier).
122 It is an overflow iff u is an infinity (since MPFR_RNDN was used).
123 Alternatively, we could test the overflow flag, but in this case,
124 mpfr_clear_flags would have been necessary. */
125 if (MPFR_IS_INF (u)) /* overflow */
127 /* Let's eliminate the obvious case where x*y and z have the
128 same sign. No possible cancellation -> real overflow.
129 Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
130 then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case
131 is also an overflow. */
132 if (MPFR_SIGN (u) == MPFR_SIGN (z) ||
133 MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3)
135 MPFR_GROUP_CLEAR (group);
136 MPFR_SAVE_EXPO_FREE (expo);
137 return mpfr_overflow (s, rnd_mode, MPFR_SIGN (z));
140 /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
141 (x/4)*y does not overflow (let's recall that the result
142 is exact with an unbounded exponent range). It does not
143 underflow either, because x*y overflows and the exponent
144 range is large enough. */
145 inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN);
146 MPFR_ASSERTN (inexact == 0);
147 inexact = mpfr_mul (u, u, y, MPFR_RNDN);
148 MPFR_ASSERTN (inexact == 0);
150 /* Now, we need to add z/4... But it may underflow! */
154 MPFR_BLOCK_DECL (flags);
156 if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) &&
157 MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u))
159 /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
164 mpfr_init2 (zo4, MPFR_PREC (z));
165 if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ))
167 /* The division by 4 underflowed! */
168 MPFR_ASSERTN (0); /* TODO... */
173 /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
174 following addition would give the same result). */
175 MPFR_BLOCK (flags, inexact = mpfr_add (s, u, zz, rnd_mode));
176 /* u and zz have different signs, so that an overflow
177 is not possible. But an underflow is theoretically
179 if (MPFR_UNDERFLOW (flags))
181 MPFR_ASSERTN (zz != z);
182 MPFR_ASSERTN (0); /* TODO... */
183 mpfr_clears (zo4, u, (mpfr_ptr) 0);
191 MPFR_GROUP_CLEAR (group);
192 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
193 inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode);
194 if (inex2) /* overflow */
197 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
203 else /* underflow: one has |xy| < 2^(emin-1). */
205 unsigned long scale = 0;
212 /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
213 (the + 1 on MPFR_PREC (s) is necessary because the exponent
214 of the result can be EXP(z) - 1). */
215 diffexp = MPFR_GET_EXP (z) - __gmpfr_emin;
216 pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1);
221 MPFR_BLOCK_DECL (flags);
223 uscale = (mpfr_uexp_t) pzs - diffexp + 1;
224 MPFR_ASSERTN (uscale > 0);
225 MPFR_ASSERTN (uscale <= ULONG_MAX);
227 mpfr_init2 (scaled_z, MPFR_PREC (z));
228 inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN);
229 MPFR_ASSERTN (inexact == 0); /* TODO: overflow case */
231 /* Now we need to recompute u = xy * 2^scale. */
233 if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y))
235 mpfr_init2 (scaled_v, MPFR_PREC (x));
236 mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN);
237 mpfr_mul (u, scaled_v, y, MPFR_RNDN);
241 mpfr_init2 (scaled_v, MPFR_PREC (y));
242 mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN);
243 mpfr_mul (u, x, scaled_v, MPFR_RNDN);
245 mpfr_clear (scaled_v);
246 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
247 xy_underflows = MPFR_UNDERFLOW (flags);
257 /* Let's replace xy by sign(xy) * 2^(emin-1). */
258 MPFR_PREC (u) = MPFR_PREC_MIN;
259 mpfr_setmin (u, __gmpfr_emin);
260 MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
265 MPFR_BLOCK_DECL (flags);
267 MPFR_BLOCK (flags, inexact = mpfr_add (s, u, new_z, rnd_mode));
268 MPFR_GROUP_CLEAR (group);
273 mpfr_clear (scaled_z);
274 /* Here an overflow is theoretically possible, in which case
275 the result may be wrong, hence the assert. An underflow
276 is not possible, but let's check that anyway. */
277 MPFR_ASSERTN (! MPFR_OVERFLOW (flags)); /* TODO... */
278 MPFR_ASSERTN (! MPFR_UNDERFLOW (flags)); /* not possible */
279 inex2 = mpfr_div_2ui (s, s, scale, MPFR_RNDN);
280 /* FIXME: this seems incorrect. MPFR_RNDN -> rnd_mode?
281 Also, handle the double rounding case:
282 s / 2^scale = 2^(emin - 2) in MPFR_RNDN. */
283 if (inex2) /* underflow */
288 /* FIXME/TODO: I'm not sure that the following is correct.
289 Check for possible spurious exceptions due to intermediate
291 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
296 inexact = mpfr_add (s, u, z, rnd_mode);
297 MPFR_GROUP_CLEAR (group);
298 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
300 MPFR_SAVE_EXPO_FREE (expo);
301 return mpfr_check_range (s, inexact, rnd_mode);