contrib/mpfr/atan.c

   1 /* mpfr_atan -- arc-tangent of a floating-point number
   2
   3 Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
   4 Contributed by the Arenaire and Cacao projects, INRIA.
   5
   6 This file is part of the GNU MPFR Library, and was contributed by Mathieu Dutour.
   7
   8 The GNU MPFR Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 2.1 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MPFR Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MPFR Library; see the file COPYING.LIB.  If not, write to
  20 the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
  21 MA 02110-1301, USA. */
  22
  23 #define MPFR_NEED_LONGLONG_H
  24 #include "mpfr-impl.h"
  25
  26 /* If x = p/2^r, put in y an approximation of atan(x)/x using 2^m terms
  27    for the series expansion, with an error of at most 1 ulp.
  28    Assumes |x| < 1.
  29
  30    If X=x^2, we want 1 - X/3 + X^2/5 - ... + (-1)^k*X^k/(2k+1) + ...
  31 */
  32 static void
  33 mpfr_atan_aux (mpfr_ptr y, mpz_ptr p, long r, int m, mpz_t *tab)
  34 {
  35   mpz_t *S, *Q, *ptoj;
  36   unsigned long n, i, k, j, l;
  37   mp_exp_t diff, expo;
  38   int im, done;
  39   mp_prec_t mult, *accu, *log2_nb_terms;
  40   mp_prec_t precy = MPFR_PREC(y);
  41
  42   if (mpz_cmp_ui (p, 0) == 0)
  43     {
  44       mpfr_set_ui (y, 1, GMP_RNDN); /* limit(atan(x)/x, x=0) */
  45       return;
  46     }
  47
  48   accu = (mp_prec_t*) (*__gmp_allocate_func) ((2 * m + 2) * sizeof (mp_prec_t));
  49   log2_nb_terms = accu + m + 1;
  50
  51   /* Set Tables */
  52   S    = tab;           /* S */
  53   ptoj = S + 1*(m+1);   /* p^2^j Precomputed table */
  54   Q    = S + 2*(m+1);   /* Product of Odd integer  table  */
  55
  56   /* From p to p^2, and r to 2r */
  57   mpz_mul (p, p, p);
  58   MPFR_ASSERTD (2 * r > r);
  59   r = 2 * r;
  60
  61   /* Normalize p */
  62   n = mpz_scan1 (p, 0);
  63   mpz_tdiv_q_2exp (p, p, n); /* exact */
  64   MPFR_ASSERTD (r > n);
  65   r -= n;
  66   /* since |p/2^r| < 1, and p is a non-zero integer, necessarily r > 0 */
  67
  68   MPFR_ASSERTD (mpz_sgn (p) > 0);
  69   MPFR_ASSERTD (m > 0);
  70
  71   /* check if p=1 (special case) */
  72   l = 0;
  73   /*
  74     We compute by binary splitting, with X = x^2 = p/2^r:
  75     P(a,b) = p if a+1=b, P(a,c)*P(c,b) otherwise
  76     Q(a,b) = (2a+1)*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
  77     S(a,b) = p*(2a+1) if a+1=b, Q(c,b)*S(a,c)+Q(a,c)*P(a,c)*S(c,b) otherwise
  78     Then atan(x)/x ~ S(0,i)/Q(0,i) for i so that (p/2^r)^i/i is small enough.
  79     The factor 2^(r*(b-a)) in Q(a,b) is implicit, thus we have to take it
  80     into account when we compute with Q.
  81   */
  82   accu[0] = 0; /* accu[k] = Mult[0] + ... + Mult[k], where Mult[j] is the
  83                   number of bits of the corresponding term S[j]/Q[j] */
  84   if (mpz_cmp_ui (p, 1) != 0)
  85     {
  86       /* p <> 1: precompute ptoj table */
  87       mpz_set (ptoj[0], p);
  88       for (im = 1 ; im <= m ; im ++)
  89         mpz_mul (ptoj[im], ptoj[im - 1], ptoj[im - 1]);
  90       /* main loop */
  91       n = 1UL << m;
  92       /* the ith term being X^i/(2i+1) with X=p/2^r, we can stop when
  93          p^i/2^(r*i) < 2^(-precy), i.e. r*i > precy + log2(p^i) */
  94       for (i = k = done = 0; (i < n) && (done == 0); i += 2, k ++)
  95         {
  96           /* initialize both S[k],Q[k] and S[k+1],Q[k+1] */
  97           mpz_set_ui (Q[k+1], 2 * i + 3); /* Q(i+1,i+2) */
  98           mpz_mul_ui (S[k+1], p, 2 * i + 1); /* S(i+1,i+2) */
  99           mpz_mul_2exp (S[k], Q[k+1], r);
 100           mpz_sub (S[k], S[k], S[k+1]); /* S(i,i+2) */
 101           mpz_mul_ui (Q[k], Q[k+1], 2 * i + 1); /* Q(i,i+2) */
 102           log2_nb_terms[k] = 1; /* S[k]/Q[k] corresponds to 2 terms */
 103           for (j = (i + 2) >> 1, l = 1; (j & 1) == 0; l ++, j >>= 1, k --)
 104             {
 105               /* invariant: S[k-1]/Q[k-1] and S[k]/Q[k] correspond
 106                  to 2^l terms each. We combine them into S[k-1]/Q[k-1] */
 107               MPFR_ASSERTD (k > 0);
 108               mpz_mul (S[k], S[k], Q[k-1]);
 109               mpz_mul (S[k], S[k], ptoj[l]);
 110               mpz_mul (S[k-1], S[k-1], Q[k]);
 111               mpz_mul_2exp (S[k-1], S[k-1], r << l);
 112               mpz_add (S[k-1], S[k-1], S[k]);
 113               mpz_mul (Q[k-1], Q[k-1], Q[k]);
 114               log2_nb_terms[k-1] = l + 1;
 115               /* now S[k-1]/Q[k-1] corresponds to 2^(l+1) terms */
 116               MPFR_MPZ_SIZEINBASE2(mult, ptoj[l+1]);
 117               /* FIXME: precompute bits(ptoj[l+1]) outside the loop? */
 118               mult = (r << (l + 1)) - mult - 1;
 119               accu[k-1] = (k == 1) ? mult : accu[k-2] + mult;
 120               if (accu[k-1] > precy)
 121                 done = 1;
 122             }
 123         }
 124     }
 125   else /* special case p=1: the ith term being X^i/(2i+1) with X=1/2^r,
 126           we can stop when r*i > precy i.e. i > precy/r */
 127     {
 128       n = 1UL << m;
 129       for (i = k = 0; (i < n) && (i <= precy / r); i += 2, k ++)
 130         {
 131           mpz_set_ui (Q[k + 1], 2 * i + 3);
 132           mpz_mul_2exp (S[k], Q[k+1], r);
 133           mpz_sub_ui (S[k], S[k], 1 + 2 * i);
 134           mpz_mul_ui (Q[k], Q[k + 1], 1 + 2 * i);
 135           log2_nb_terms[k] = 1; /* S[k]/Q[k] corresponds to 2 terms */
 136           for (j = (i + 2) >> 1, l = 1; (j & 1) == 0; l++, j >>= 1, k --)
 137             {
 138               MPFR_ASSERTD (k > 0);
 139               mpz_mul (S[k], S[k], Q[k-1]);
 140               mpz_mul (S[k-1], S[k-1], Q[k]);
 141               mpz_mul_2exp (S[k-1], S[k-1], r << l);
 142               mpz_add (S[k-1], S[k-1], S[k]);
 143               mpz_mul (Q[k-1], Q[k-1], Q[k]);
 144               log2_nb_terms[k-1] = l + 1;
 145             }
 146         }
 147     }
 148
 149   /* we need to combine S[0]/Q[0]...S[k-1]/Q[k-1] */
 150   l = 0; /* number of terms accumulated in S[k]/Q[k] */
 151   while (k > 1)
 152     {
 153       k --;
 154       /* combine S[k-1]/Q[k-1] and S[k]/Q[k] */
 155       j = log2_nb_terms[k-1];
 156       mpz_mul (S[k], S[k], Q[k-1]);
 157       if (mpz_cmp_ui (p, 1) != 0)
 158         mpz_mul (S[k], S[k], ptoj[j]);
 159       mpz_mul (S[k-1], S[k-1], Q[k]);
 160       l += 1 << log2_nb_terms[k];
 161       mpz_mul_2exp (S[k-1], S[k-1], r * l);
 162       mpz_add (S[k-1], S[k-1], S[k]);
 163       mpz_mul (Q[k-1], Q[k-1], Q[k]);
 164     }
 165   (*__gmp_free_func) (accu, (2 * m + 2) * sizeof (mp_prec_t));
 166
 167   MPFR_MPZ_SIZEINBASE2 (diff, S[0]);
 168   diff -= 2 * precy;
 169   expo = diff;
 170   if (diff >= 0)
 171     mpz_tdiv_q_2exp (S[0], S[0], diff);
 172   else
 173     mpz_mul_2exp (S[0], S[0], -diff);
 174
 175   MPFR_MPZ_SIZEINBASE2 (diff, Q[0]);
 176   diff -= precy;
 177   expo -= diff;
 178   if (diff >= 0)
 179     mpz_tdiv_q_2exp (Q[0], Q[0], diff);
 180   else
 181     mpz_mul_2exp (Q[0], Q[0], -diff);
 182
 183   mpz_tdiv_q (S[0], S[0], Q[0]);
 184   mpfr_set_z (y, S[0], GMP_RNDD);
 185   MPFR_SET_EXP (y, MPFR_EXP(y) + expo - r * (i - 1));
 186 }
 187
 188 int
 189 mpfr_atan (mpfr_ptr atan, mpfr_srcptr x, mp_rnd_t rnd_mode)
 190 {
 191   mpfr_t xp, arctgt, sk, tmp, tmp2;
 192   mpz_t  ukz;
 193   mpz_t *tabz;
 194   mp_exp_t exptol;
 195   mp_prec_t prec, realprec;
 196   unsigned long twopoweri;
 197   int comparaison, inexact, inexact2;
 198   int i, n0, oldn0;
 199   MPFR_GROUP_DECL (group);
 200   MPFR_SAVE_EXPO_DECL (expo);
 201   MPFR_ZIV_DECL (loop);
 202
 203   MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", x, x, rnd_mode),
 204                  ("atan[%#R]=%R inexact=%d", atan, atan, inexact));
 205
 206   /* Singular cases */
 207   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
 208     {
 209       if (MPFR_IS_NAN (x))
 210         {
 211           MPFR_SET_NAN (atan);
 212           MPFR_RET_NAN;
 213         }
 214       else if (MPFR_IS_INF (x))
 215         {
 216           if (MPFR_IS_POS (x))  /* arctan(+inf) = Pi/2 */
 217             inexact = mpfr_const_pi (atan, rnd_mode);
 218           else /* arctan(-inf) = -Pi/2 */
 219             {
 220               inexact = -mpfr_const_pi (atan,
 221                                         MPFR_INVERT_RND (rnd_mode));
 222               MPFR_CHANGE_SIGN (atan);
 223             }
 224           inexact2 = mpfr_div_2ui (atan, atan, 1, rnd_mode);
 225           if (MPFR_UNLIKELY (inexact2))
 226             inexact = inexact2; /* An underflow occurs */
 227           MPFR_RET (inexact);
 228         }
 229       else /* x is necessarily 0 */
 230         {
 231           MPFR_ASSERTD (MPFR_IS_ZERO (x));
 232           MPFR_SET_ZERO (atan);
 233           MPFR_SET_SAME_SIGN (atan, x);
 234           MPFR_RET (0);
 235         }
 236     }
 237
 238   /* atan(x) = x - x^3/3 + x^5/5...
 239      so the error is < 2^(3*EXP(x)-1)
 240      so `EXP(x)-(3*EXP(x)-1)` = -2*EXP(x)+1 */
 241   MPFR_FAST_COMPUTE_IF_SMALL_INPUT (atan, x, -2 * MPFR_GET_EXP (x), 1, 0,
 242                                     rnd_mode, {});
 243
 244   /* Set x_p=|x| */
 245   MPFR_TMP_INIT_ABS (xp, x);
 246
 247   /* Other simple case arctan(-+1)=-+pi/4 */
 248   comparaison = mpfr_cmp_ui (xp, 1);
 249   if (MPFR_UNLIKELY (comparaison == 0))
 250     {
 251       int neg = MPFR_IS_NEG (x);
 252       inexact = mpfr_const_pi (atan, MPFR_IS_POS (x) ? rnd_mode
 253                                : MPFR_INVERT_RND (rnd_mode));
 254       if (neg)
 255         {
 256           inexact = -inexact;
 257           MPFR_CHANGE_SIGN (atan);
 258         }
 259       inexact2 = mpfr_div_2ui (atan, atan, 2, rnd_mode);
 260       if (MPFR_UNLIKELY (inexact2))
 261         inexact = inexact2; /* an underflow occurs */
 262       return inexact;
 263     }
 264
 265   realprec = MPFR_PREC (atan) + MPFR_INT_CEIL_LOG2 (MPFR_PREC (atan)) + 4;
 266   prec = realprec + BITS_PER_MP_LIMB;
 267
 268   MPFR_SAVE_EXPO_MARK (expo);
 269
 270   /* Initialisation    */
 271   mpz_init (ukz);
 272   MPFR_GROUP_INIT_4 (group, prec, sk, tmp, tmp2, arctgt);
 273   oldn0 = 0;
 274   tabz = (mpz_t *) 0;
 275
 276   MPFR_ZIV_INIT (loop, prec);
 277   for (;;)
 278     {
 279       /* First, if |x| < 1, we need to have more prec to be able to round (sup)
 280          n0 = ceil(log(prec_requested + 2 + 1+ln(2.4)/ln(2))/log(2)) */
 281       mp_prec_t sup;
 282 #if 0
 283       sup = 1;
 284       if (MPFR_GET_EXP (xp) < 0
 285           && (mpfr_uexp_t) (2-MPFR_GET_EXP (xp)) > realprec)
 286         sup = (mpfr_uexp_t) (2-MPFR_GET_EXP (xp)) - realprec;
 287 #else
 288       sup = MPFR_GET_EXP (xp) < 0 ? 2-MPFR_GET_EXP (xp) : 1;
 289 #endif
 290       n0 = MPFR_INT_CEIL_LOG2 ((realprec + sup) + 3);
 291       MPFR_ASSERTD (3*n0 > 2);
 292       prec = (realprec + sup) + 1 + MPFR_INT_CEIL_LOG2 (3*n0-2);
 293
 294       /* Initialisation */
 295       MPFR_GROUP_REPREC_4 (group, prec, sk, tmp, tmp2, arctgt);
 296       if (MPFR_LIKELY (oldn0 == 0))
 297         {
 298           oldn0 = 3*(n0+1);
 299           tabz = (mpz_t *) (*__gmp_allocate_func) (oldn0*sizeof (mpz_t));
 300           for (i = 0; i < oldn0; i++)
 301             mpz_init (tabz[i]);
 302         }
 303       else if (MPFR_UNLIKELY (oldn0 < 3*n0+1))
 304         {
 305           tabz = (mpz_t *) (*__gmp_reallocate_func)
 306             (tabz, oldn0*sizeof (mpz_t), 3*(n0+1)*sizeof (mpz_t));
 307           for (i = oldn0; i < 3*(n0+1); i++)
 308             mpz_init (tabz[i]);
 309           oldn0 = 3*(n0+1);
 310         }
 311
 312       if (comparaison > 0)
 313         mpfr_ui_div (sk, 1, xp, GMP_RNDN);
 314       else
 315         mpfr_set (sk, xp, GMP_RNDN);
 316
 317       /* sk is 1/|x| if |x| > 1, and |x| otherwise, i.e. min(|x|, 1/|x|) */
 318
 319       /* Assignation  */
 320       MPFR_SET_ZERO (arctgt);
 321       twopoweri = 1<<0;
 322       MPFR_ASSERTD (n0 >= 4);
 323       /* FIXME: further reduce the argument so that it is less than
 324          1/n where n is the output precision. In such a way, the
 325          first calls to mpfr_atan_aux will not be too expensive,
 326          since the number of needed terms will be n/log(n), so the
 327          factorial contribution will be O(n). */
 328       for (i = 0 ; i < n0; i++)
 329         {
 330           if (MPFR_UNLIKELY (MPFR_IS_ZERO (sk)))
 331             break;
 332           /* Calculation of trunc(tmp) --> mpz */
 333           mpfr_mul_2ui (tmp, sk, twopoweri, GMP_RNDN);
 334           mpfr_trunc (tmp, tmp);
 335           if (!MPFR_IS_ZERO (tmp))
 336             {
 337               exptol = mpfr_get_z_exp (ukz, tmp);
 338               /* since the s_k are decreasing (see algorithms.tex),
 339                  and s_0 = min(|x|, 1/|x|) < 1, we have sk < 1,
 340                  thus exptol < 0 */
 341               MPFR_ASSERTD (exptol < 0);
 342               mpz_tdiv_q_2exp (ukz, ukz, (unsigned long int) (-exptol));
 343               /* Calculation of arctan(Ak) */
 344               mpfr_set_z (tmp, ukz, GMP_RNDN);
 345               mpfr_div_2ui (tmp, tmp, twopoweri, GMP_RNDN);
 346               mpfr_atan_aux (tmp2, ukz, twopoweri, n0 - i, tabz);
 347               mpfr_mul (tmp2, tmp2, tmp, GMP_RNDN);
 348               /* Addition */
 349               mpfr_add (arctgt, arctgt, tmp2, GMP_RNDN);
 350               /* Next iteration */
 351               mpfr_sub (tmp2, sk, tmp, GMP_RNDN);
 352               mpfr_mul (sk, sk, tmp, GMP_RNDN);
 353               mpfr_add_ui (sk, sk, 1, GMP_RNDN);
 354               mpfr_div (sk, tmp2, sk, GMP_RNDN);
 355             }
 356           twopoweri <<= 1;
 357         }
 358       /* Add last step (Arctan(sk) ~= sk */
 359       mpfr_add (arctgt, arctgt, sk, GMP_RNDN);
 360       if (comparaison > 0)
 361         {
 362           mpfr_const_pi (tmp, GMP_RNDN);
 363           mpfr_div_2ui (tmp, tmp, 1, GMP_RNDN);
 364           mpfr_sub (arctgt, tmp, arctgt, GMP_RNDN);
 365         }
 366       MPFR_SET_POS (arctgt);
 367
 368       if (MPFR_LIKELY (MPFR_CAN_ROUND (arctgt, realprec, MPFR_PREC (atan),
 369                                        rnd_mode)))
 370         break;
 371       MPFR_ZIV_NEXT (loop, realprec);
 372     }
 373   MPFR_ZIV_FREE (loop);
 374
 375   inexact = mpfr_set4 (atan, arctgt, rnd_mode, MPFR_SIGN (x));
 376
 377   for (i = 0 ; i < oldn0 ; i++)
 378     mpz_clear (tabz[i]);
 379   mpz_clear (ukz);
 380   (*__gmp_free_func) (tabz, oldn0*sizeof (mpz_t));
 381   MPFR_GROUP_CLEAR (group);
 382
 383   MPFR_SAVE_EXPO_FREE (expo);
 384   return mpfr_check_range (arctgt, inexact, rnd_mode);
 385 }