contrib/mpfr/src/sin_cos.c

   1 /* mpfr_sin_cos -- sine and cosine of a floating-point number
   2
   3 Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
   4 Contributed by the AriC and Caramel projects, INRIA.
   5
   6 This file is part of the GNU MPFR Library.
   7
   8 The GNU MPFR Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MPFR Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
  20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
  21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
  22
  23 #define MPFR_NEED_LONGLONG_H
  24 #include "mpfr-impl.h"
  25
  26 /* (y, z) <- (sin(x), cos(x)), return value is 0 iff both results are exact
  27    ie, iff x = 0 */
  28 int
  29 mpfr_sin_cos (mpfr_ptr y, mpfr_ptr z, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
  30 {
  31   mpfr_prec_t prec, m;
  32   int neg, reduce;
  33   mpfr_t c, xr;
  34   mpfr_srcptr xx;
  35   mpfr_exp_t err, expx;
  36   int inexy, inexz;
  37   MPFR_ZIV_DECL (loop);
  38   MPFR_SAVE_EXPO_DECL (expo);
  39
  40   MPFR_ASSERTN (y != z);
  41
  42   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
  43     {
  44       if (MPFR_IS_NAN(x) || MPFR_IS_INF(x))
  45         {
  46           MPFR_SET_NAN (y);
  47           MPFR_SET_NAN (z);
  48           MPFR_RET_NAN;
  49         }
  50       else /* x is zero */
  51         {
  52           MPFR_ASSERTD (MPFR_IS_ZERO (x));
  53           MPFR_SET_ZERO (y);
  54           MPFR_SET_SAME_SIGN (y, x);
  55           /* y = 0, thus exact, but z is inexact in case of underflow
  56              or overflow */
  57           inexy = 0; /* y is exact */
  58           inexz = mpfr_set_ui (z, 1, rnd_mode);
  59           return INEX(inexy,inexz);
  60         }
  61     }
  62
  63   MPFR_LOG_FUNC
  64     (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd_mode),
  65      ("sin[%Pu]=%.*Rg cos[%Pu]=%.*Rg", mpfr_get_prec(y), mpfr_log_prec, y,
  66       mpfr_get_prec (z), mpfr_log_prec, z));
  67
  68   MPFR_SAVE_EXPO_MARK (expo);
  69
  70   prec = MAX (MPFR_PREC (y), MPFR_PREC (z));
  71   m = prec + MPFR_INT_CEIL_LOG2 (prec) + 13;
  72   expx = MPFR_GET_EXP (x);
  73
  74   /* When x is close to 0, say 2^(-k), then there is a cancellation of about
  75      2k bits in 1-cos(x)^2. FIXME: in that case, it would be more efficient
  76      to compute sin(x) directly. VL: This is partly done by using
  77      MPFR_FAST_COMPUTE_IF_SMALL_INPUT from the mpfr_sin and mpfr_cos
  78      functions. Moreover, any overflow on m is avoided. */
  79   if (expx < 0)
  80     {
  81       /* Warning: in case y = x, and the first call to
  82          MPFR_FAST_COMPUTE_IF_SMALL_INPUT succeeds but the second fails,
  83          we will have clobbered the original value of x.
  84          The workaround is to first compute z = cos(x) in that case, since
  85          y and z are different. */
  86       if (y != x)
  87         /* y and x differ, thus we can safely try to compute y first */
  88         {
  89           MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
  90             y, x, -2 * expx, 2, 0, rnd_mode,
  91             { inexy = _inexact;
  92               goto small_input; });
  93           if (0)
  94             {
  95             small_input:
  96               /* we can go here only if we can round sin(x) */
  97               MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
  98                 z, __gmpfr_one, -2 * expx, 1, 0, rnd_mode,
  99                 { inexz = _inexact;
 100                   MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 101                   goto end; });
 102             }
 103
 104           /* if we go here, one of the two MPFR_FAST_COMPUTE_IF_SMALL_INPUT
 105              calls failed */
 106         }
 107       else /* y and x are the same variable: try to compute z first, which
 108               necessarily differs */
 109         {
 110           MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
 111             z, __gmpfr_one, -2 * expx, 1, 0, rnd_mode,
 112             { inexz = _inexact;
 113               goto small_input2; });
 114           if (0)
 115             {
 116             small_input2:
 117               /* we can go here only if we can round cos(x) */
 118               MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
 119                 y, x, -2 * expx, 2, 0, rnd_mode,
 120                 { inexy = _inexact;
 121                   MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 122                   goto end; });
 123             }
 124         }
 125       m += 2 * (-expx);
 126     }
 127
 128   if (prec >= MPFR_SINCOS_THRESHOLD)
 129     {
 130       MPFR_SAVE_EXPO_FREE (expo);
 131       return mpfr_sincos_fast (y, z, x, rnd_mode);
 132     }
 133
 134   mpfr_init (c);
 135   mpfr_init (xr);
 136
 137   MPFR_ZIV_INIT (loop, m);
 138   for (;;)
 139     {
 140       /* the following is copied from sin.c */
 141       if (expx >= 2) /* reduce the argument */
 142         {
 143           reduce = 1;
 144           mpfr_set_prec (c, expx + m - 1);
 145           mpfr_set_prec (xr, m);
 146           mpfr_const_pi (c, MPFR_RNDN);
 147           mpfr_mul_2ui (c, c, 1, MPFR_RNDN);
 148           mpfr_remainder (xr, x, c, MPFR_RNDN);
 149           mpfr_div_2ui (c, c, 1, MPFR_RNDN);
 150           if (MPFR_SIGN (xr) > 0)
 151             mpfr_sub (c, c, xr, MPFR_RNDZ);
 152           else
 153             mpfr_add (c, c, xr, MPFR_RNDZ);
 154           if (MPFR_IS_ZERO(xr)
 155               || MPFR_EXP(xr) < (mpfr_exp_t) 3 - (mpfr_exp_t) m
 156               || MPFR_EXP(c) < (mpfr_exp_t) 3 - (mpfr_exp_t) m)
 157             goto next_step;
 158           xx = xr;
 159         }
 160       else /* the input argument is already reduced */
 161         {
 162           reduce = 0;
 163           xx = x;
 164         }
 165
 166       neg = MPFR_IS_NEG (xx); /* gives sign of sin(x) */
 167       mpfr_set_prec (c, m);
 168       mpfr_cos (c, xx, MPFR_RNDZ);
 169       /* If no argument reduction was performed, the error is at most ulp(c),
 170          otherwise it is at most ulp(c) + 2^(2-m). Since |c| < 1, we have
 171          ulp(c) <= 2^(-m), thus the error is bounded by 2^(3-m) in that later
 172          case. */
 173       if (reduce == 0)
 174         err = m;
 175       else
 176         err = MPFR_GET_EXP (c) + (mpfr_exp_t) (m - 3);
 177       if (!mpfr_can_round (c, err, MPFR_RNDN, MPFR_RNDZ,
 178                            MPFR_PREC (z) + (rnd_mode == MPFR_RNDN)))
 179         goto next_step;
 180
 181       /* we can't set z now, because in case z = x, and the mpfr_can_round()
 182          call below fails, we will have clobbered the input */
 183       mpfr_set_prec (xr, MPFR_PREC(c));
 184       mpfr_swap (xr, c); /* save the approximation of the cosine in xr */
 185       mpfr_sqr (c, xr, MPFR_RNDU); /* the absolute error is bounded by
 186                                       2^(5-m) if reduce=1, and by 2^(2-m)
 187                                       otherwise */
 188       mpfr_ui_sub (c, 1, c, MPFR_RNDN); /* error bounded by 2^(6-m) if reduce
 189                                            is 1, and 2^(3-m) otherwise */
 190       mpfr_sqrt (c, c, MPFR_RNDN); /* the absolute error is bounded by
 191                                       2^(6-m-Exp(c)) if reduce=1, and
 192                                       2^(3-m-Exp(c)) otherwise */
 193       err = 3 + 3 * reduce - MPFR_GET_EXP (c);
 194       if (neg)
 195         MPFR_CHANGE_SIGN (c);
 196
 197       /* the absolute error on c is at most 2^(err-m), which we must put
 198          in the form 2^(EXP(c)-err). */
 199       err = MPFR_GET_EXP (c) + (mpfr_exp_t) m - err;
 200       if (mpfr_can_round (c, err, MPFR_RNDN, MPFR_RNDZ,
 201                           MPFR_PREC (y) + (rnd_mode == MPFR_RNDN)))
 202         break;
 203       /* check for huge cancellation */
 204       if (err < (mpfr_exp_t) MPFR_PREC (y))
 205         m += MPFR_PREC (y) - err;
 206       /* Check if near 1 */
 207       if (MPFR_GET_EXP (c) == 1
 208           && MPFR_MANT (c)[MPFR_LIMB_SIZE (c)-1] == MPFR_LIMB_HIGHBIT)
 209         m += m;
 210
 211     next_step:
 212       MPFR_ZIV_NEXT (loop, m);
 213       mpfr_set_prec (c, m);
 214     }
 215   MPFR_ZIV_FREE (loop);
 216
 217   inexy = mpfr_set (y, c, rnd_mode);
 218   inexz = mpfr_set (z, xr, rnd_mode);
 219
 220   mpfr_clear (c);
 221   mpfr_clear (xr);
 222
 223  end:
 224   MPFR_SAVE_EXPO_FREE (expo);
 225   /* FIXME: add a test for bug before revision 7355 */
 226   inexy = mpfr_check_range (y, inexy, rnd_mode);
 227   inexz = mpfr_check_range (z, inexz, rnd_mode);
 228   MPFR_RET (INEX(inexy,inexz));
 229 }
 230
 231 /*************** asymptotically fast implementation below ********************/
 232
 233 /* truncate Q from R to at most prec bits.
 234    Return the number of truncated bits.
 235  */
 236 static mpfr_prec_t
 237 reduce (mpz_t Q, mpz_srcptr R, mpfr_prec_t prec)
 238 {
 239   mpfr_prec_t l = mpz_sizeinbase (R, 2);
 240
 241   l = (l > prec) ? l - prec : 0;
 242   mpz_fdiv_q_2exp (Q, R, l);
 243   return l;
 244 }
 245
 246 /* truncate S and C so that the smaller has prec bits.
 247    Return the number of truncated bits.
 248  */
 249 static unsigned long
 250 reduce2 (mpz_t S, mpz_t C, mpfr_prec_t prec)
 251 {
 252   unsigned long ls = mpz_sizeinbase (S, 2);
 253   unsigned long lc = mpz_sizeinbase (C, 2);
 254   unsigned long l;
 255
 256   l = (ls < lc) ? ls : lc; /* smaller length */
 257   l = (l > prec) ? l - prec : 0;
 258   mpz_fdiv_q_2exp (S, S, l);
 259   mpz_fdiv_q_2exp (C, C, l);
 260   return l;
 261 }
 262
 263 /* return in S0/Q0 a rational approximation of sin(X) with absolute error
 264                      bounded by 9*2^(-prec), where 0 <= X=p/2^r <= 1/2,
 265    and in    C0/Q0 a rational approximation of cos(X), with relative error
 266                      bounded by 9*2^(-prec) (and also absolute error, since
 267                      |cos(X)| <= 1).
 268    We have sin(X)/X = sum((-1)^i*(p/2^r)^i/(2i+1)!, i=0..infinity).
 269    We use the following binary splitting formula:
 270    P(a,b) = (-p)^(b-a)
 271    Q(a,b) = (2a)*(2a+1)*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
 272    T(a,b) = 1 if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise.
 273
 274    Since we use P(a,b) for b-a=2^k only, we compute only p^(2^k).
 275    We do not store the factor 2^r in Q().
 276
 277    Then sin(X)/X ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough.
 278
 279    Return l such that Q0 has to be multiplied by 2^l.
 280
 281    Assumes prec >= 10.
 282 */
 283 static unsigned long
 284 sin_bs_aux (mpz_t Q0, mpz_t S0, mpz_t C0, mpz_srcptr p, mpfr_prec_t r,
 285             mpfr_prec_t prec)
 286 {
 287   mpz_t T[GMP_NUMB_BITS], Q[GMP_NUMB_BITS], ptoj[GMP_NUMB_BITS], pp;
 288   mpfr_prec_t log2_nb_terms[GMP_NUMB_BITS], mult[GMP_NUMB_BITS];
 289   mpfr_prec_t accu[GMP_NUMB_BITS], size_ptoj[GMP_NUMB_BITS];
 290   mpfr_prec_t prec_i_have, r0 = r;
 291   unsigned long alloc, i, j, k;
 292   mpfr_prec_t l;
 293
 294   if (MPFR_UNLIKELY(mpz_cmp_ui (p, 0) == 0)) /* sin(x)/x -> 1 */
 295     {
 296       mpz_set_ui (Q0, 1);
 297       mpz_set_ui (S0, 1);
 298       mpz_set_ui (C0, 1);
 299       return 0;
 300     }
 301
 302   /* check that X=p/2^r <= 1/2 */
 303   MPFR_ASSERTN(mpz_sizeinbase (p, 2) - (mpfr_exp_t) r <= -1);
 304
 305   mpz_init (pp);
 306
 307   /* normalize p (non-zero here) */
 308   l = mpz_scan1 (p, 0);
 309   mpz_fdiv_q_2exp (pp, p, l); /* p = pp * 2^l */
 310   mpz_mul (pp, pp, pp);
 311   r = 2 * (r - l);            /* x^2 = (p/2^r0)^2 = pp / 2^r */
 312
 313   /* now p is odd */
 314   alloc = 2;
 315   mpz_init_set_ui (T[0], 6);
 316   mpz_init_set_ui (Q[0], 6);
 317   mpz_init_set (ptoj[0], pp); /* ptoj[i] = pp^(2^i) */
 318   mpz_init (T[1]);
 319   mpz_init (Q[1]);
 320   mpz_init (ptoj[1]);
 321   mpz_mul (ptoj[1], pp, pp);  /* ptoj[1] = pp^2 */
 322   size_ptoj[1] = mpz_sizeinbase (ptoj[1], 2);
 323
 324   mpz_mul_2exp (T[0], T[0], r);
 325   mpz_sub (T[0], T[0], pp);      /* 6*2^r - pp = 6*2^r*(1 - x^2/6) */
 326   log2_nb_terms[0] = 1;
 327
 328   /* already take into account the factor x=p/2^r in sin(x) = x * (...) */
 329   mult[0] = r  - mpz_sizeinbase (pp, 2) + r0 - mpz_sizeinbase (p, 2);
 330   /* we have x^3 < 1/2^mult[0] */
 331
 332   for (i = 2, k = 0, prec_i_have = mult[0]; prec_i_have < prec; i += 2)
 333     {
 334       /* i is even here */
 335       /* invariant: Q[0]*Q[1]*...*Q[k] equals (2i-1)!,
 336          we have already summed terms of index < i
 337          in S[0]/Q[0], ..., S[k]/Q[k] */
 338       k ++;
 339       if (k + 1 >= alloc) /* necessarily k + 1 = alloc */
 340         {
 341           alloc ++;
 342           mpz_init (T[k+1]);
 343           mpz_init (Q[k+1]);
 344           mpz_init (ptoj[k+1]);
 345           mpz_mul (ptoj[k+1], ptoj[k], ptoj[k]); /* pp^(2^(k+1)) */
 346           size_ptoj[k+1] = mpz_sizeinbase (ptoj[k+1], 2);
 347         }
 348       /* for i even, we have Q[k] = (2*i)*(2*i+1), T[k] = 1,
 349          then                Q[k+1] = (2*i+2)*(2*i+3), T[k+1] = 1,
 350          which reduces to T[k] = (2*i+2)*(2*i+3)*2^r-pp,
 351          Q[k] = (2*i)*(2*i+1)*(2*i+2)*(2*i+3). */
 352       log2_nb_terms[k] = 1;
 353       mpz_set_ui (Q[k], (2 * i + 2) * (2 * i + 3));
 354       mpz_mul_2exp (T[k], Q[k], r);
 355       mpz_sub (T[k], T[k], pp);
 356       mpz_mul_ui (Q[k], Q[k], (2 * i) * (2 * i + 1));
 357       /* the next term of the series is divided by Q[k] and multiplied
 358          by pp^2/2^(2r), thus the mult. factor < 1/2^mult[k] */
 359       mult[k] = mpz_sizeinbase (Q[k], 2) + 2 * r - size_ptoj[1] - 1;
 360       /* the absolute contribution of the next term is 1/2^accu[k] */
 361       accu[k] = (k == 0) ? mult[k] : mult[k] + accu[k-1];
 362       prec_i_have = accu[k]; /* the current term is < 1/2^accu[k] */
 363       j = (i + 2) / 2;
 364       l = 1;
 365       while ((j & 1) == 0) /* combine and reduce */
 366         {
 367           mpz_mul (T[k], T[k], ptoj[l]);
 368           mpz_mul (T[k-1], T[k-1], Q[k]);
 369           mpz_mul_2exp (T[k-1], T[k-1], r << l);
 370           mpz_add (T[k-1], T[k-1], T[k]);
 371           mpz_mul (Q[k-1], Q[k-1], Q[k]);
 372           log2_nb_terms[k-1] ++; /* number of terms in S[k-1]
 373                                     is a power of 2 by construction */
 374           prec_i_have = mpz_sizeinbase (Q[k], 2);
 375           mult[k-1] += prec_i_have + (r << l) - size_ptoj[l] - 1;
 376           accu[k-1] = (k == 1) ? mult[k-1] : mult[k-1] + accu[k-2];
 377           prec_i_have = accu[k-1];
 378           l ++;
 379           j >>= 1;
 380           k --;
 381         }
 382     }
 383
 384   /* accumulate all products in T[0] and Q[0]. Warning: contrary to above,
 385      here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */
 386   l = 0; /* number of accumulated terms in the right part T[k]/Q[k] */
 387   while (k > 0)
 388     {
 389       j = log2_nb_terms[k-1];
 390       mpz_mul (T[k], T[k], ptoj[j]);
 391       mpz_mul (T[k-1], T[k-1], Q[k]);
 392       l += 1 << log2_nb_terms[k];
 393       mpz_mul_2exp (T[k-1], T[k-1], r * l);
 394       mpz_add (T[k-1], T[k-1], T[k]);
 395       mpz_mul (Q[k-1], Q[k-1], Q[k]);
 396       k--;
 397     }
 398
 399   l = r0 + r * (i - 1); /* implicit multiplier 2^r for Q0 */
 400   /* at this point T[0]/(2^l*Q[0]) is an approximation of sin(x) where the 1st
 401      neglected term has contribution < 1/2^prec, thus since the series has
 402      alternate signs, the error is < 1/2^prec */
 403
 404   /* we truncate Q0 to prec bits: the relative error is at most 2^(1-prec),
 405      which means that Q0 = Q[0] * (1+theta) with |theta| <= 2^(1-prec)
 406      [up to a power of two] */
 407   l += reduce (Q0, Q[0], prec);
 408   l -= reduce (T[0], T[0], prec);
 409   /* multiply by x = p/2^l */
 410   mpz_mul (S0, T[0], p);
 411   l -= reduce (S0, S0, prec); /* S0 = T[0] * (1 + theta)^2 up to power of 2 */
 412   /* sin(X) ~ S0/Q0*(1 + theta)^3 + err with |theta| <= 2^(1-prec) and
 413               |err| <= 2^(-prec), thus since |S0/Q0| <= 1:
 414      |sin(X) - S0/Q0| <= 4*|theta*S0/Q0| + |err| <= 9*2^(-prec) */
 415
 416   mpz_clear (pp);
 417   for (j = 0; j < alloc; j ++)
 418     {
 419       mpz_clear (T[j]);
 420       mpz_clear (Q[j]);
 421       mpz_clear (ptoj[j]);
 422     }
 423
 424   /* compute cos(X) from sin(X): sqrt(1-(S/Q)^2) = sqrt(Q^2-S^2)/Q
 425      = sqrt(Q0^2*2^(2l)-S0^2)/Q0.
 426      Write S/Q = sin(X) + eps with |eps| <= 9*2^(-prec),
 427      then sqrt(Q^2-S^2) = sqrt(Q^2-Q^2*(sin(X)+eps)^2)
 428                         = sqrt(Q^2*cos(X)^2-Q^2*(2*sin(X)*eps+eps^2))
 429                         = sqrt(Q^2*cos(X)^2-Q^2*eps1) with |eps1|<=9*2^(-prec)
 430                           [using X<=1/2 and eps<=9*2^(-prec) and prec>=10]
 431
 432                         Since we truncate the square root, we get:
 433                           sqrt(Q^2*cos(X)^2-Q^2*eps1)+eps2 with |eps2|<1
 434                         = Q*sqrt(cos(X)^2-eps1)+eps2
 435                         = Q*cos(X)*(1+eps3)+eps2 with |eps3| <= 6*2^(-prec)
 436                         = Q*cos(X)*(1+eps3+eps2/(Q*cos(X)))
 437                         = Q*cos(X)*(1+eps4) with |eps4| <= 9*2^(-prec)
 438                           since |Q| >= 2^(prec-1) */
 439   /* we assume that Q0*2^l >= 2^(prec-1) */
 440   MPFR_ASSERTN(l + mpz_sizeinbase (Q0, 2) >= prec);
 441   mpz_mul (C0, Q0, Q0);
 442   mpz_mul_2exp (C0, C0, 2 * l);
 443   mpz_submul (C0, S0, S0);
 444   mpz_sqrt (C0, C0);
 445
 446   return l;
 447 }
 448
 449 /* Put in s and c approximations of sin(x) and cos(x) respectively.
 450    Assumes 0 < x < Pi/4 and PREC(s) = PREC(c) >= 10.
 451    Return err such that the relative error is bounded by 2^err ulps.
 452 */
 453 static int
 454 sincos_aux (mpfr_t s, mpfr_t c, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
 455 {
 456   mpfr_prec_t prec_s, sh;
 457   mpz_t Q, S, C, Q2, S2, C2, y;
 458   mpfr_t x2;
 459   unsigned long l, l2, j, err;
 460
 461   MPFR_ASSERTD(MPFR_PREC(s) == MPFR_PREC(c));
 462
 463   prec_s = MPFR_PREC(s);
 464
 465   mpfr_init2 (x2, MPFR_PREC(x));
 466   mpz_init (Q);
 467   mpz_init (S);
 468   mpz_init (C);
 469   mpz_init (Q2);
 470   mpz_init (S2);
 471   mpz_init (C2);
 472   mpz_init (y);
 473
 474   mpfr_set (x2, x, MPFR_RNDN); /* exact */
 475   mpz_set_ui (Q, 1);
 476   l = 0;
 477   mpz_set_ui (S, 0); /* sin(0) = S/(2^l*Q), exact */
 478   mpz_set_ui (C, 1); /* cos(0) = C/(2^l*Q), exact */
 479
 480   /* Invariant: x = X + x2/2^(sh-1), where the part X was already treated,
 481      S/(2^l*Q) ~ sin(X), C/(2^l*Q) ~ cos(X), and x2/2^(sh-1) < Pi/4.
 482      'sh-1' is the number of already shifted bits in x2.
 483   */
 484
 485   for (sh = 1, j = 0; mpfr_cmp_ui (x2, 0) != 0 && sh <= prec_s; sh <<= 1, j++)
 486     {
 487       if (sh > prec_s / 2) /* sin(x) = x + O(x^3), cos(x) = 1 + O(x^2) */
 488         {
 489           l2 = -mpfr_get_z_2exp (S2, x2); /* S2/2^l2 = x2 */
 490           l2 += sh - 1;
 491           mpz_set_ui (Q2, 1);
 492           mpz_set_ui (C2, 1);
 493           mpz_mul_2exp (C2, C2, l2);
 494           mpfr_set_ui (x2, 0, MPFR_RNDN);
 495         }
 496       else
 497         {
 498           /* y <- trunc(x2 * 2^sh) = trunc(x * 2^(2*sh-1)) */
 499           mpfr_mul_2exp (x2, x2, sh, MPFR_RNDN); /* exact */
 500           mpfr_get_z (y, x2, MPFR_RNDZ); /* round towards zero: now
 501                                            0 <= x2 < 2^sh, thus
 502                                            0 <= x2/2^(sh-1) < 2^(1-sh) */
 503           if (mpz_cmp_ui (y, 0) == 0)
 504             continue;
 505           mpfr_sub_z (x2, x2, y, MPFR_RNDN); /* should be exact */
 506           l2 = sin_bs_aux (Q2, S2, C2, y, 2 * sh - 1, prec_s);
 507           /* we now have |S2/Q2/2^l2 - sin(X)| <= 9*2^(prec_s)
 508              and |C2/Q2/2^l2 - cos(X)| <= 6*2^(prec_s), with X=y/2^(2sh-1) */
 509         }
 510       if (sh == 1) /* S=0, C=1 */
 511         {
 512           l = l2;
 513           mpz_swap (Q, Q2);
 514           mpz_swap (S, S2);
 515           mpz_swap (C, C2);
 516         }
 517       else
 518         {
 519           /* s <- s*c2+c*s2, c <- c*c2-s*s2, using Karatsuba:
 520              a = s+c, b = s2+c2, t = a*b, d = s*s2, e = c*c2,
 521              s <- t - d - e, c <- e - d */
 522           mpz_add (y, S, C); /* a */
 523           mpz_mul (C, C, C2); /* e */
 524           mpz_add (C2, C2, S2); /* b */
 525           mpz_mul (S2, S, S2); /* d */
 526           mpz_mul (y, y, C2); /* a*b */
 527           mpz_sub (S, y, S2); /* t - d */
 528           mpz_sub (S, S, C); /* t - d - e */
 529           mpz_sub (C, C, S2); /* e - d */
 530           mpz_mul (Q, Q, Q2);
 531           /* after j loops, the error is <= (11j-2)*2^(prec_s) */
 532           l += l2;
 533           /* reduce Q to prec_s bits */
 534           l += reduce (Q, Q, prec_s);
 535           /* reduce S,C to prec_s bits, error <= 11*j*2^(prec_s) */
 536           l -= reduce2 (S, C, prec_s);
 537         }
 538     }
 539
 540   j = 11 * j;
 541   for (err = 0; j > 1; j = (j + 1) / 2, err ++);
 542
 543   mpfr_set_z (s, S, MPFR_RNDN);
 544   mpfr_div_z (s, s, Q, MPFR_RNDN);
 545   mpfr_div_2exp (s, s, l, MPFR_RNDN);
 546
 547   mpfr_set_z (c, C, MPFR_RNDN);
 548   mpfr_div_z (c, c, Q, MPFR_RNDN);
 549   mpfr_div_2exp (c, c, l, MPFR_RNDN);
 550
 551   mpz_clear (Q);
 552   mpz_clear (S);
 553   mpz_clear (C);
 554   mpz_clear (Q2);
 555   mpz_clear (S2);
 556   mpz_clear (C2);
 557   mpz_clear (y);
 558   mpfr_clear (x2);
 559   return err;
 560 }
 561
 562 /* Assumes x is neither NaN, +/-Inf, nor +/- 0.
 563    One of s and c might be NULL, in which case the corresponding value is
 564    not computed.
 565    Assumes s differs from c.
 566  */
 567 int
 568 mpfr_sincos_fast (mpfr_t s, mpfr_t c, mpfr_srcptr x, mpfr_rnd_t rnd)
 569 {
 570   int inexs, inexc;
 571   mpfr_t x_red, ts, tc;
 572   mpfr_prec_t w;
 573   mpfr_exp_t err, errs, errc;
 574   MPFR_ZIV_DECL (loop);
 575
 576   MPFR_ASSERTN(s != c);
 577   if (s == NULL)
 578     w = MPFR_PREC(c);
 579   else if (c == NULL)
 580     w = MPFR_PREC(s);
 581   else
 582     w = MPFR_PREC(s) >= MPFR_PREC(c) ? MPFR_PREC(s) : MPFR_PREC(c);
 583   w += MPFR_INT_CEIL_LOG2(w) + 9; /* ensures w >= 10 (needed by sincos_aux) */
 584   mpfr_init2 (ts, w);
 585   mpfr_init2 (tc, w);
 586
 587   MPFR_ZIV_INIT (loop, w);
 588   for (;;)
 589     {
 590       /* if 0 < x <= Pi/4, we can call sincos_aux directly */
 591       if (MPFR_IS_POS(x) && mpfr_cmp_ui_2exp (x, 1686629713, -31) <= 0)
 592         {
 593           err = sincos_aux (ts, tc, x, MPFR_RNDN);
 594         }
 595       /* if -Pi/4 <= x < 0, use sin(-x)=-sin(x) */
 596       else if (MPFR_IS_NEG(x) && mpfr_cmp_si_2exp (x, -1686629713, -31) >= 0)
 597         {
 598           mpfr_init2 (x_red, MPFR_PREC(x));
 599           mpfr_neg (x_red, x, rnd); /* exact */
 600           err = sincos_aux (ts, tc, x_red, MPFR_RNDN);
 601           mpfr_neg (ts, ts, MPFR_RNDN);
 602           mpfr_clear (x_red);
 603         }
 604       else /* argument reduction is needed */
 605         {
 606           long q;
 607           mpfr_t pi;
 608           int neg = 0;
 609
 610           mpfr_init2 (x_red, w);
 611           mpfr_init2 (pi, (MPFR_EXP(x) > 0) ? w + MPFR_EXP(x) : w);
 612           mpfr_const_pi (pi, MPFR_RNDN);
 613           mpfr_div_2exp (pi, pi, 1, MPFR_RNDN); /* Pi/2 */
 614           mpfr_remquo (x_red, &q, x, pi, MPFR_RNDN);
 615           /* x = q * (Pi/2 + eps1) + x_red + eps2,
 616              where |eps1| <= 1/2*ulp(Pi/2) = 2^(-w-MAX(0,EXP(x))),
 617              and eps2 <= 1/2*ulp(x_red) <= 1/2*ulp(Pi/2) = 2^(-w)
 618              Since |q| <= x/(Pi/2) <= |x|, we have
 619              q*|eps1| <= 2^(-w), thus
 620              |x - q * Pi/2 - x_red| <= 2^(1-w) */
 621           /* now -Pi/4 <= x_red <= Pi/4: if x_red < 0, consider -x_red */
 622           if (MPFR_IS_NEG(x_red))
 623             {
 624               mpfr_neg (x_red, x_red, MPFR_RNDN);
 625               neg = 1;
 626             }
 627           err = sincos_aux (ts, tc, x_red, MPFR_RNDN);
 628           err ++; /* to take into account the argument reduction */
 629           if (neg) /* sin(-x) = -sin(x), cos(-x) = cos(x) */
 630             mpfr_neg (ts, ts, MPFR_RNDN);
 631           if (q & 2) /* sin(x+Pi) = -sin(x), cos(x+Pi) = -cos(x) */
 632             {
 633               mpfr_neg (ts, ts, MPFR_RNDN);
 634               mpfr_neg (tc, tc, MPFR_RNDN);
 635             }
 636           if (q & 1) /* sin(x+Pi/2) = cos(x), cos(x+Pi/2) = -sin(x) */
 637             {
 638               mpfr_neg (ts, ts, MPFR_RNDN);
 639               mpfr_swap (ts, tc);
 640             }
 641           mpfr_clear (x_red);
 642           mpfr_clear (pi);
 643         }
 644       /* adjust errors with respect to absolute values */
 645       errs = err - MPFR_EXP(ts);
 646       errc = err - MPFR_EXP(tc);
 647       if ((s == NULL || MPFR_CAN_ROUND (ts, w - errs, MPFR_PREC(s), rnd)) &&
 648           (c == NULL || MPFR_CAN_ROUND (tc, w - errc, MPFR_PREC(c), rnd)))
 649         break;
 650       MPFR_ZIV_NEXT (loop, w);
 651       mpfr_set_prec (ts, w);
 652       mpfr_set_prec (tc, w);
 653     }
 654   MPFR_ZIV_FREE (loop);
 655
 656   inexs = (s == NULL) ? 0 : mpfr_set (s, ts, rnd);
 657   inexc = (c == NULL) ? 0 : mpfr_set (c, tc, rnd);
 658
 659   mpfr_clear (ts);
 660   mpfr_clear (tc);
 661   return INEX(inexs,inexc);
 662 }