contrib/mpfr/src/lngamma.c

   1 /* mpfr_lngamma -- lngamma function
   2
   3 Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
   4 Contributed by the Arenaire and Caramel projects, INRIA.
   5
   6 This file is part of the GNU MPFR Library.
   7
   8 The GNU MPFR Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MPFR Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
  20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
  21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
  22
  23 #define MPFR_NEED_LONGLONG_H
  24 #include "mpfr-impl.h"
  25
  26 /* given a precision p, return alpha, such that the argument reduction
  27    will use k = alpha*p*log(2).
  28
  29    Warning: we should always have alpha >= log(2)/(2Pi) ~ 0.11,
  30    and the smallest value of alpha multiplied by the smallest working
  31    precision should be >= 4.
  32 */
  33 static void
  34 mpfr_gamma_alpha (mpfr_t s, mpfr_prec_t p)
  35 {
  36   if (p <= 100)
  37     mpfr_set_ui_2exp (s, 614, -10, MPFR_RNDN); /* about 0.6 */
  38   else if (p <= 500)
  39     mpfr_set_ui_2exp (s, 819, -10, MPFR_RNDN); /* about 0.8 */
  40   else if (p <= 1000)
  41     mpfr_set_ui_2exp (s, 1331, -10, MPFR_RNDN); /* about 1.3 */
  42   else if (p <= 2000)
  43     mpfr_set_ui_2exp (s, 1741, -10, MPFR_RNDN); /* about 1.7 */
  44   else if (p <= 5000)
  45     mpfr_set_ui_2exp (s, 2253, -10, MPFR_RNDN); /* about 2.2 */
  46   else if (p <= 10000)
  47     mpfr_set_ui_2exp (s, 3482, -10, MPFR_RNDN); /* about 3.4 */
  48   else
  49     mpfr_set_ui_2exp (s, 9, -1, MPFR_RNDN); /* 4.5 */
  50 }
  51
  52 #ifndef IS_GAMMA
  53 static int
  54 unit_bit (mpfr_srcptr (x))
  55 {
  56   mpfr_exp_t expo;
  57   mpfr_prec_t prec;
  58   mp_limb_t x0;
  59
  60   expo = MPFR_GET_EXP (x);
  61   if (expo <= 0)
  62     return 0;  /* |x| < 1 */
  63
  64   prec = MPFR_PREC (x);
  65   if (expo > prec)
  66     return 0;  /* y is a multiple of 2^(expo-prec), thus an even integer */
  67
  68   /* Now, the unit bit is represented. */
  69
  70   prec = ((prec - 1) / GMP_NUMB_BITS + 1) * GMP_NUMB_BITS - expo;
  71   /* number of represented fractional bits (including the trailing 0's) */
  72
  73   x0 = *(MPFR_MANT (x) + prec / GMP_NUMB_BITS);
  74   /* limb containing the unit bit */
  75
  76   return (x0 >> (prec % GMP_NUMB_BITS)) & 1;
  77 }
  78 #endif
  79
  80 /* lngamma(x) = log(gamma(x)).
  81    We use formula [6.1.40] from Abramowitz&Stegun:
  82    lngamma(z) = (z-1/2)*log(z) - z + 1/2*log(2*Pi)
  83               + sum (Bernoulli[2m]/(2m)/(2m-1)/z^(2m-1),m=1..infinity)
  84    According to [6.1.42], if the sum is truncated after m=n, the error
  85    R_n(z) is bounded by |B[2n+2]|*K(z)/(2n+1)/(2n+2)/|z|^(2n+1)
  86    where K(z) = max (z^2/(u^2+z^2)) for u >= 0.
  87    For z real, |K(z)| <= 1 thus R_n(z) is bounded by the first neglected term.
  88  */
  89 #ifdef IS_GAMMA
  90 #define GAMMA_FUNC mpfr_gamma_aux
  91 #else
  92 #define GAMMA_FUNC mpfr_lngamma_aux
  93 #endif
  94
  95 static int
  96 GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
  97 {
  98   mpfr_prec_t precy, w; /* working precision */
  99   mpfr_t s, t, u, v, z;
 100   unsigned long m, k, maxm;
 101   mpz_t *INITIALIZED(B);  /* variable B declared as initialized */
 102   int inexact, compared;
 103   mpfr_exp_t err_s, err_t;
 104   unsigned long Bm = 0; /* number of allocated B[] */
 105   unsigned long oldBm;
 106   double d;
 107   MPFR_SAVE_EXPO_DECL (expo);
 108
 109   compared = mpfr_cmp_ui (z0, 1);
 110
 111   MPFR_SAVE_EXPO_MARK (expo);
 112
 113 #ifndef IS_GAMMA /* lngamma or lgamma */
 114   if (compared == 0 || (compared > 0 && mpfr_cmp_ui (z0, 2) == 0))
 115     {
 116       MPFR_SAVE_EXPO_FREE (expo);
 117       return mpfr_set_ui (y, 0, MPFR_RNDN);  /* lngamma(1 or 2) = +0 */
 118     }
 119
 120   /* Deal here with tiny inputs. We have for -0.3 <= x <= 0.3:
 121      - log|x| - gamma*x <= log|gamma(x)| <= - log|x| - gamma*x + x^2 */
 122   if (MPFR_EXP(z0) <= - (mpfr_exp_t) MPFR_PREC(y))
 123     {
 124       mpfr_t l, h, g;
 125       int ok, inex2;
 126       mpfr_prec_t prec = MPFR_PREC(y) + 14;
 127       MPFR_ZIV_DECL (loop);
 128
 129       MPFR_ZIV_INIT (loop, prec);
 130       do
 131         {
 132           mpfr_init2 (l, prec);
 133           if (MPFR_IS_POS(z0))
 134             {
 135               mpfr_log (l, z0, MPFR_RNDU); /* upper bound for log(z0) */
 136               mpfr_init2 (h, MPFR_PREC(l));
 137             }
 138           else
 139             {
 140               mpfr_init2 (h, MPFR_PREC(z0));
 141               mpfr_neg (h, z0, MPFR_RNDN); /* exact */
 142               mpfr_log (l, h, MPFR_RNDU); /* upper bound for log(-z0) */
 143               mpfr_set_prec (h, MPFR_PREC(l));
 144             }
 145           mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(|z0|) */
 146           mpfr_set (h, l, MPFR_RNDD); /* exact */
 147           mpfr_nextabove (h); /* upper bound for -log(|z0|), avoids two calls
 148                                  to mpfr_log */
 149           mpfr_init2 (g, MPFR_PREC(l));
 150           /* if z0>0, we need an upper approximation of Euler's constant
 151              for the left bound */
 152           mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDU : MPFR_RNDD);
 153           mpfr_mul (g, g, z0, MPFR_RNDD);
 154           mpfr_sub (l, l, g, MPFR_RNDD);
 155           mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDD : MPFR_RNDU); /* cached */
 156           mpfr_mul (g, g, z0, MPFR_RNDU);
 157           mpfr_sub (h, h, g, MPFR_RNDD);
 158           mpfr_mul (g, z0, z0, MPFR_RNDU);
 159           mpfr_add (h, h, g, MPFR_RNDU);
 160           inexact = mpfr_prec_round (l, MPFR_PREC(y), rnd);
 161           inex2 = mpfr_prec_round (h, MPFR_PREC(y), rnd);
 162           /* Caution: we not only need l = h, but both inexact flags should
 163              agree. Indeed, one of the inexact flags might be zero. In that
 164              case if we assume lngamma(z0) cannot be exact, the other flag
 165              should be correct. We are conservative here and request that both
 166              inexact flags agree. */
 167           ok = SAME_SIGN (inexact, inex2) && mpfr_cmp (l, h) == 0;
 168           if (ok)
 169             mpfr_set (y, h, rnd); /* exact */
 170           mpfr_clear (l);
 171           mpfr_clear (h);
 172           mpfr_clear (g);
 173           if (ok)
 174             {
 175               MPFR_SAVE_EXPO_FREE (expo);
 176               return mpfr_check_range (y, inexact, rnd);
 177             }
 178           /* since we have log|gamma(x)| = - log|x| - gamma*x + O(x^2),
 179              if x ~ 2^(-n), then we have a n-bit approximation, thus
 180              we can try again with a working precision of n bits,
 181              especially when n >> PREC(y).
 182              Otherwise we would use the reflection formula evaluating x-1,
 183              which would need precision n. */
 184           MPFR_ZIV_NEXT (loop, prec);
 185         }
 186       while (prec <= -MPFR_EXP(z0));
 187       MPFR_ZIV_FREE (loop);
 188     }
 189 #endif
 190
 191   precy = MPFR_PREC(y);
 192
 193   mpfr_init2 (s, MPFR_PREC_MIN);
 194   mpfr_init2 (t, MPFR_PREC_MIN);
 195   mpfr_init2 (u, MPFR_PREC_MIN);
 196   mpfr_init2 (v, MPFR_PREC_MIN);
 197   mpfr_init2 (z, MPFR_PREC_MIN);
 198
 199   if (compared < 0)
 200     {
 201       mpfr_exp_t err_u;
 202
 203       /* use reflection formula:
 204          gamma(x) = Pi*(x-1)/sin(Pi*(2-x))/gamma(2-x)
 205          thus lngamma(x) = log(Pi*(x-1)/sin(Pi*(2-x))) - lngamma(2-x) */
 206
 207       w = precy + MPFR_INT_CEIL_LOG2 (precy);
 208       while (1)
 209         {
 210           w += MPFR_INT_CEIL_LOG2 (w) + 14;
 211           MPFR_ASSERTD(w >= 3);
 212           mpfr_set_prec (s, w);
 213           mpfr_set_prec (t, w);
 214           mpfr_set_prec (u, w);
 215           mpfr_set_prec (v, w);
 216           /* In the following, we write r for a real of absolute value
 217              at most 2^(-w). Different instances of r may represent different
 218              values. */
 219           mpfr_ui_sub (s, 2, z0, MPFR_RNDD); /* s = (2-z0) * (1+2r) >= 1 */
 220           mpfr_const_pi (t, MPFR_RNDN);      /* t = Pi * (1+r) */
 221           mpfr_lngamma (u, s, MPFR_RNDN); /* lngamma(2-x) */
 222           /* Let s = (2-z0) + h. By construction, -(2-z0)*2^(1-w) <= h <= 0.
 223              We have lngamma(s) = lngamma(2-z0) + h*Psi(z), z in [2-z0+h,2-z0].
 224              Since 2-z0+h = s >= 1 and |Psi(x)| <= max(1,log(x)) for x >= 1,
 225              the error on u is bounded by
 226              ulp(u)/2 + (2-z0)*max(1,log(2-z0))*2^(1-w)
 227              = (1/2 + (2-z0)*max(1,log(2-z0))*2^(1-E(u))) ulp(u) */
 228           d = (double) MPFR_GET_EXP(s) * 0.694; /* upper bound for log(2-z0) */
 229           err_u = MPFR_GET_EXP(s) + __gmpfr_ceil_log2 (d) + 1 - MPFR_GET_EXP(u);
 230           err_u = (err_u >= 0) ? err_u + 1 : 0;
 231           /* now the error on u is bounded by 2^err_u ulps */
 232
 233           mpfr_mul (s, s, t, MPFR_RNDN); /* Pi*(2-x) * (1+r)^4 */
 234           err_s = MPFR_GET_EXP(s); /* 2-x <= 2^err_s */
 235           mpfr_sin (s, s, MPFR_RNDN); /* sin(Pi*(2-x)) */
 236           /* the error on s is bounded by 1/2*ulp(s) + [(1+2^(-w))^4-1]*(2-x)
 237              <= 1/2*ulp(s) + 5*2^(-w)*(2-x) for w >= 3
 238              <= (1/2 + 5 * 2^(-E(s)) * (2-x)) ulp(s) */
 239           err_s += 3 - MPFR_GET_EXP(s);
 240           err_s = (err_s >= 0) ? err_s + 1 : 0;
 241           /* the error on s is bounded by 2^err_s ulp(s), thus by
 242              2^(err_s+1)*2^(-w)*|s| since ulp(s) <= 2^(1-w)*|s|.
 243              Now n*2^(-w) can always be written |(1+r)^n-1| for some
 244              |r|<=2^(-w), thus taking n=2^(err_s+1) we see that
 245              |S - s| <= |(1+r)^(2^(err_s+1))-1| * |s|, where S is the
 246              true value.
 247              In fact if ulp(s) <= ulp(S) the same inequality holds for
 248              |S| instead of |s| in the right hand side, i.e., we can
 249              write s = (1+r)^(2^(err_s+1)) * S.
 250              But if ulp(S) < ulp(s), we need to add one ``bit'' to the error,
 251              to get s = (1+r)^(2^(err_s+2)) * S. This is true since with
 252              E = n*2^(-w) we have |s - S| <= E * |s|, thus
 253              |s - S| <= E/(1-E) * |S|.
 254              Now E/(1-E) is bounded by 2E as long as E<=1/2,
 255              and 2E can be written (1+r)^(2n)-1 as above.
 256           */
 257           err_s += 2; /* exponent of relative error */
 258
 259           mpfr_sub_ui (v, z0, 1, MPFR_RNDN); /* v = (x-1) * (1+r) */
 260           mpfr_mul (v, v, t, MPFR_RNDN); /* v = Pi*(x-1) * (1+r)^3 */
 261           mpfr_div (v, v, s, MPFR_RNDN); /* Pi*(x-1)/sin(Pi*(2-x)) */
 262           mpfr_abs (v, v, MPFR_RNDN);
 263           /* (1+r)^(3+2^err_s+1) */
 264           err_s = (err_s <= 1) ? 3 : err_s + 1;
 265           /* now (1+r)^M with M <= 2^err_s */
 266           mpfr_log (v, v, MPFR_RNDN);
 267           /* log(v*(1+e)) = log(v)+log(1+e) where |e| <= 2^(err_s-w).
 268              Since |log(1+e)| <= 2*e for |e| <= 1/4, the error on v is
 269              bounded by ulp(v)/2 + 2^(err_s+1-w). */
 270           if (err_s + 2 > w)
 271             {
 272               w += err_s + 2;
 273             }
 274           else
 275             {
 276               err_s += 1 - MPFR_GET_EXP(v);
 277               err_s = (err_s >= 0) ? err_s + 1 : 0;
 278               /* the error on v is bounded by 2^err_s ulps */
 279               err_u += MPFR_GET_EXP(u); /* absolute error on u */
 280               err_s += MPFR_GET_EXP(v); /* absolute error on v */
 281               mpfr_sub (s, v, u, MPFR_RNDN);
 282               /* the total error on s is bounded by ulp(s)/2 + 2^(err_u-w)
 283                  + 2^(err_s-w) <= ulp(s)/2 + 2^(max(err_u,err_s)+1-w) */
 284               err_s = (err_s >= err_u) ? err_s : err_u;
 285               err_s += 1 - MPFR_GET_EXP(s); /* error is 2^err_s ulp(s) */
 286               err_s = (err_s >= 0) ? err_s + 1 : 0;
 287               if (mpfr_can_round (s, w - err_s, MPFR_RNDN, MPFR_RNDZ, precy
 288                                   + (rnd == MPFR_RNDN)))
 289                 goto end;
 290             }
 291         }
 292     }
 293
 294   /* now z0 > 1 */
 295
 296   MPFR_ASSERTD (compared > 0);
 297
 298   /* since k is O(w), the value of log(z0*...*(z0+k-1)) is about w*log(w),
 299      so there is a cancellation of ~log(w) in the argument reconstruction */
 300   w = precy + MPFR_INT_CEIL_LOG2 (precy);
 301
 302   do
 303     {
 304       w += MPFR_INT_CEIL_LOG2 (w) + 13;
 305       MPFR_ASSERTD (w >= 3);
 306
 307       /* argument reduction: we compute gamma(z0 + k), where the series
 308          has error term B_{2n}/(z0+k)^(2n) ~ (n/(Pi*e*(z0+k)))^(2n)
 309          and we need k steps of argument reconstruction. Assuming k is large
 310          with respect to z0, and k = n, we get 1/(Pi*e)^(2n) ~ 2^(-w), i.e.,
 311          k ~ w*log(2)/2/log(Pi*e) ~ 0.1616 * w.
 312          However, since the series is more expensive to compute, the optimal
 313          value seems to be k ~ 4.5 * w experimentally. */
 314       mpfr_set_prec (s, 53);
 315       mpfr_gamma_alpha (s, w);
 316       mpfr_set_ui_2exp (s, 9, -1, MPFR_RNDU);
 317       mpfr_mul_ui (s, s, w, MPFR_RNDU);
 318       if (mpfr_cmp (z0, s) < 0)
 319         {
 320           mpfr_sub (s, s, z0, MPFR_RNDU);
 321           k = mpfr_get_ui (s, MPFR_RNDU);
 322           if (k < 3)
 323             k = 3;
 324         }
 325       else
 326         k = 3;
 327
 328       mpfr_set_prec (s, w);
 329       mpfr_set_prec (t, w);
 330       mpfr_set_prec (u, w);
 331       mpfr_set_prec (v, w);
 332       mpfr_set_prec (z, w);
 333
 334       mpfr_add_ui (z, z0, k, MPFR_RNDN);
 335       /* z = (z0+k)*(1+t1) with |t1| <= 2^(-w) */
 336
 337       /* z >= 4 ensures the relative error on log(z) is small,
 338          and also (z-1/2)*log(z)-z >= 0 */
 339       MPFR_ASSERTD (mpfr_cmp_ui (z, 4) >= 0);
 340
 341       mpfr_log (s, z, MPFR_RNDN); /* log(z) */
 342       /* we have s = log((z0+k)*(1+t1))*(1+t2) with |t1|, |t2| <= 2^(-w).
 343          Since w >= 2 and z0+k >= 4, we can write log((z0+k)*(1+t1))
 344          = log(z0+k) * (1+t3) with |t3| <= 2^(-w), thus we have
 345          s = log(z0+k) * (1+t4)^2 with |t4| <= 2^(-w) */
 346       mpfr_mul_2ui (t, z, 1, MPFR_RNDN); /* t = 2z * (1+t5) */
 347       mpfr_sub_ui (t, t, 1, MPFR_RNDN); /* t = 2z-1 * (1+t6)^3 */
 348       /* since we can write 2z*(1+t5) = (2z-1)*(1+t5') with
 349          t5' = 2z/(2z-1) * t5, thus |t5'| <= 8/7 * t5 */
 350       mpfr_mul (s, s, t, MPFR_RNDN); /* (2z-1)*log(z) * (1+t7)^6 */
 351       mpfr_div_2ui (s, s, 1, MPFR_RNDN); /* (z-1/2)*log(z) * (1+t7)^6 */
 352       mpfr_sub (s, s, z, MPFR_RNDN); /* (z-1/2)*log(z)-z */
 353       /* s = [(z-1/2)*log(z)-z]*(1+u)^14, s >= 1/2 */
 354
 355       mpfr_ui_div (u, 1, z, MPFR_RNDN); /* 1/z * (1+u), u <= 1/4 since z >= 4 */
 356
 357       /* the first term is B[2]/2/z = 1/12/z: t=1/12/z, C[2]=1 */
 358       mpfr_div_ui (t, u, 12, MPFR_RNDN); /* 1/(12z) * (1+u)^2, t <= 3/128 */
 359       mpfr_set (v, t, MPFR_RNDN);        /* (1+u)^2, v < 2^(-5) */
 360       mpfr_add (s, s, v, MPFR_RNDN);     /* (1+u)^15 */
 361
 362       mpfr_mul (u, u, u, MPFR_RNDN); /* 1/z^2 * (1+u)^3 */
 363
 364       if (Bm == 0)
 365         {
 366           B = mpfr_bernoulli_internal ((mpz_t *) 0, 0);
 367           B = mpfr_bernoulli_internal (B, 1);
 368           Bm = 2;
 369         }
 370
 371       /* m <= maxm ensures that 2*m*(2*m+1) <= ULONG_MAX */
 372       maxm = 1UL << (GMP_NUMB_BITS / 2 - 1);
 373
 374       /* s:(1+u)^15, t:(1+u)^2, t <= 3/128 */
 375
 376       for (m = 2; MPFR_GET_EXP(v) + (mpfr_exp_t) w >= MPFR_GET_EXP(s); m++)
 377         {
 378           mpfr_mul (t, t, u, MPFR_RNDN); /* (1+u)^(10m-14) */
 379           if (m <= maxm)
 380             {
 381               mpfr_mul_ui (t, t, 2*(m-1)*(2*m-3), MPFR_RNDN);
 382               mpfr_div_ui (t, t, 2*m*(2*m-1), MPFR_RNDN);
 383               mpfr_div_ui (t, t, 2*m*(2*m+1), MPFR_RNDN);
 384             }
 385           else
 386             {
 387               mpfr_mul_ui (t, t, 2*(m-1), MPFR_RNDN);
 388               mpfr_mul_ui (t, t, 2*m-3, MPFR_RNDN);
 389               mpfr_div_ui (t, t, 2*m, MPFR_RNDN);
 390               mpfr_div_ui (t, t, 2*m-1, MPFR_RNDN);
 391               mpfr_div_ui (t, t, 2*m, MPFR_RNDN);
 392               mpfr_div_ui (t, t, 2*m+1, MPFR_RNDN);
 393             }
 394           /* (1+u)^(10m-8) */
 395           /* invariant: t=1/(2m)/(2m-1)/z^(2m-1)/(2m+1)! */
 396           if (Bm <= m)
 397             {
 398               B = mpfr_bernoulli_internal (B, m); /* B[2m]*(2m+1)!, exact */
 399               Bm ++;
 400             }
 401           mpfr_mul_z (v, t, B[m], MPFR_RNDN); /* (1+u)^(10m-7) */
 402           MPFR_ASSERTD(MPFR_GET_EXP(v) <= - (2 * m + 3));
 403           mpfr_add (s, s, v, MPFR_RNDN);
 404         }
 405       /* m <= 1/2*Pi*e*z ensures that |v[m]| < 1/2^(2m+3) */
 406       MPFR_ASSERTD ((double) m <= 4.26 * mpfr_get_d (z, MPFR_RNDZ));
 407
 408       /* We have sum([(1+u)^(10m-7)-1]*1/2^(2m+3), m=2..infinity)
 409          <= 1.46*u for u <= 2^(-3).
 410          We have 0 < lngamma(z) - [(z - 1/2) ln(z) - z + 1/2 ln(2 Pi)] < 0.021
 411          for z >= 4, thus since the initial s >= 0.85, the different values of
 412          s differ by at most one binade, and the total rounding error on s
 413          in the for-loop is bounded by 2*(m-1)*ulp(final_s).
 414          The error coming from the v's is bounded by
 415          1.46*2^(-w) <= 2*ulp(final_s).
 416          Thus the total error so far is bounded by [(1+u)^15-1]*s+2m*ulp(s)
 417          <= (2m+47)*ulp(s).
 418          Taking into account the truncation error (which is bounded by the last
 419          term v[] according to 6.1.42 in A&S), the bound is (2m+48)*ulp(s).
 420       */
 421
 422       /* add 1/2*log(2*Pi) and subtract log(z0*(z0+1)*...*(z0+k-1)) */
 423       mpfr_const_pi (v, MPFR_RNDN); /* v = Pi*(1+u) */
 424       mpfr_mul_2ui (v, v, 1, MPFR_RNDN); /* v = 2*Pi * (1+u) */
 425       if (k)
 426         {
 427           unsigned long l;
 428           mpfr_set (t, z0, MPFR_RNDN); /* t = z0*(1+u) */
 429           for (l = 1; l < k; l++)
 430             {
 431               mpfr_add_ui (u, z0, l, MPFR_RNDN); /* u = (z0+l)*(1+u) */
 432               mpfr_mul (t, t, u, MPFR_RNDN);     /* (1+u)^(2l+1) */
 433             }
 434           /* now t: (1+u)^(2k-1) */
 435           /* instead of computing log(sqrt(2*Pi)/t), we compute
 436              1/2*log(2*Pi/t^2), which trades a square root for a square */
 437           mpfr_mul (t, t, t, MPFR_RNDN); /* (z0*...*(z0+k-1))^2, (1+u)^(4k-1) */
 438           mpfr_div (v, v, t, MPFR_RNDN);
 439           /* 2*Pi/(z0*...*(z0+k-1))^2 (1+u)^(4k+1) */
 440         }
 441 #ifdef IS_GAMMA
 442       err_s = MPFR_GET_EXP(s);
 443       mpfr_exp (s, s, MPFR_RNDN);
 444       /* before the exponential, we have s = s0 + h where
 445          |h| <= (2m+48)*ulp(s), thus exp(s0) = exp(s) * exp(-h).
 446          For |h| <= 1/4, we have |exp(h)-1| <= 1.2*|h| thus
 447          |exp(s) - exp(s0)| <= 1.2 * exp(s) * (2m+48)* 2^(EXP(s)-w). */
 448       d = 1.2 * (2.0 * (double) m + 48.0);
 449       /* the error on s is bounded by d*2^err_s * 2^(-w) */
 450       mpfr_sqrt (t, v, MPFR_RNDN);
 451       /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
 452          thus t = sqrt(v0)*(1+u)^(2k+3/2). */
 453       mpfr_mul (s, s, t, MPFR_RNDN);
 454       /* the error on input s is bounded by (1+u)^(d*2^err_s),
 455          and that on t is (1+u)^(2k+3/2), thus the
 456          total error is (1+u)^(d*2^err_s+2k+5/2) */
 457       err_s += __gmpfr_ceil_log2 (d);
 458       err_t = __gmpfr_ceil_log2 (2.0 * (double) k + 2.5);
 459       err_s = (err_s >= err_t) ? err_s + 1 : err_t + 1;
 460 #else
 461       mpfr_log (t, v, MPFR_RNDN);
 462       /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
 463          thus log(v) = log(v0) + (4k+1)*log(1+u). Since |log(1+u)/u| <= 1.07
 464          for |u| <= 2^(-3), the absolute error on log(v) is bounded by
 465          1.07*(4k+1)*u, and the rounding error by ulp(t). */
 466       mpfr_div_2ui (t, t, 1, MPFR_RNDN);
 467       /* the error on t is now bounded by ulp(t) + 0.54*(4k+1)*2^(-w).
 468          We have sqrt(2*Pi)/(z0*(z0+1)*...*(z0+k-1)) <= sqrt(2*Pi)/k! <= 0.5
 469          since k>=3, thus t <= -0.5 and ulp(t) >= 2^(-w).
 470          Thus the error on t is bounded by (2.16*k+1.54)*ulp(t). */
 471       err_t = MPFR_GET_EXP(t) + (mpfr_exp_t)
 472         __gmpfr_ceil_log2 (2.2 * (double) k + 1.6);
 473       err_s = MPFR_GET_EXP(s) + (mpfr_exp_t)
 474         __gmpfr_ceil_log2 (2.0 * (double) m + 48.0);
 475       mpfr_add (s, s, t, MPFR_RNDN); /* this is a subtraction in fact */
 476       /* the final error in ulp(s) is
 477          <= 1 + 2^(err_t-EXP(s)) + 2^(err_s-EXP(s))
 478          <= 2^(1+max(err_t,err_s)-EXP(s)) if err_t <> err_s
 479          <= 2^(2+max(err_t,err_s)-EXP(s)) if err_t = err_s */
 480       err_s = (err_t == err_s) ? 1 + err_s : ((err_t > err_s) ? err_t : err_s);
 481       err_s += 1 - MPFR_GET_EXP(s);
 482 #endif
 483     }
 484   while (MPFR_UNLIKELY (!MPFR_CAN_ROUND (s, w - err_s, precy, rnd)));
 485
 486   oldBm = Bm;
 487   while (Bm--)
 488     mpz_clear (B[Bm]);
 489   (*__gmp_free_func) (B, oldBm * sizeof (mpz_t));
 490
 491  end:
 492   inexact = mpfr_set (y, s, rnd);
 493
 494   mpfr_clear (s);
 495   mpfr_clear (t);
 496   mpfr_clear (u);
 497   mpfr_clear (v);
 498   mpfr_clear (z);
 499
 500   MPFR_SAVE_EXPO_FREE (expo);
 501   return mpfr_check_range (y, inexact, rnd);
 502 }
 503
 504 #ifndef IS_GAMMA
 505
 506 int
 507 mpfr_lngamma (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd)
 508 {
 509   int inex;
 510
 511   MPFR_LOG_FUNC
 512     (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd),
 513      ("y[%Pu]=%.*Rg inexact=%d",
 514       mpfr_get_prec (y), mpfr_log_prec, y, inex));
 515
 516   /* special cases */
 517   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
 518     {
 519       if (MPFR_IS_NAN (x) || MPFR_IS_NEG (x))
 520         {
 521           MPFR_SET_NAN (y);
 522           MPFR_RET_NAN;
 523         }
 524       else /* lngamma(+Inf) = lngamma(+0) = +Inf */
 525         {
 526           if (MPFR_IS_ZERO (x))
 527             mpfr_set_divby0 ();
 528           MPFR_SET_INF (y);
 529           MPFR_SET_POS (y);
 530           MPFR_RET (0);  /* exact */
 531         }
 532     }
 533
 534   /* if x < 0 and -2k-1 <= x <= -2k, then lngamma(x) = NaN */
 535   if (MPFR_IS_NEG (x) && (unit_bit (x) == 0 || mpfr_integer_p (x)))
 536     {
 537       MPFR_SET_NAN (y);
 538       MPFR_RET_NAN;
 539     }
 540
 541   inex = mpfr_lngamma_aux (y, x, rnd);
 542   return inex;
 543 }
 544
 545 int
 546 mpfr_lgamma (mpfr_ptr y, int *signp, mpfr_srcptr x, mpfr_rnd_t rnd)
 547 {
 548   int inex;
 549
 550   MPFR_LOG_FUNC
 551     (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd),
 552      ("y[%Pu]=%.*Rg signp=%d inexact=%d",
 553       mpfr_get_prec (y), mpfr_log_prec, y, *signp, inex));
 554
 555   *signp = 1;  /* most common case */
 556
 557   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
 558     {
 559       if (MPFR_IS_NAN (x))
 560         {
 561           MPFR_SET_NAN (y);
 562           MPFR_RET_NAN;
 563         }
 564       else
 565         {
 566           if (MPFR_IS_ZERO (x))
 567             mpfr_set_divby0 ();
 568           *signp = MPFR_INT_SIGN (x);
 569           MPFR_SET_INF (y);
 570           MPFR_SET_POS (y);
 571           MPFR_RET (0);
 572         }
 573     }
 574
 575   if (MPFR_IS_NEG (x))
 576     {
 577       if (mpfr_integer_p (x))
 578         {
 579           MPFR_SET_INF (y);
 580           MPFR_SET_POS (y);
 581           mpfr_set_divby0 ();
 582           MPFR_RET (0);
 583         }
 584
 585       if (unit_bit (x) == 0)
 586         *signp = -1;
 587
 588       /* For tiny negative x, we have gamma(x) = 1/x - euler + O(x),
 589          thus |gamma(x)| = -1/x + euler + O(x), and
 590          log |gamma(x)| = -log(-x) - euler*x + O(x^2).
 591          More precisely we have for -0.4 <= x < 0:
 592          -log(-x) <= log |gamma(x)| <= -log(-x) - x.
 593          Since log(x) is not representable, we may have an instance of the
 594          Table Maker Dilemma. The only way to ensure correct rounding is to
 595          compute an interval [l,h] such that l <= -log(-x) and
 596          -log(-x) - x <= h, and check whether l and h round to the same number
 597          for the target precision and rounding modes. */
 598       if (MPFR_EXP(x) + 1 <= - (mpfr_exp_t) MPFR_PREC(y))
 599         /* since PREC(y) >= 1, this ensures EXP(x) <= -2,
 600            thus |x| <= 0.25 < 0.4 */
 601         {
 602           mpfr_t l, h;
 603           int ok, inex2;
 604           mpfr_prec_t w = MPFR_PREC (y) + 14;
 605           mpfr_exp_t expl;
 606
 607           while (1)
 608             {
 609               mpfr_init2 (l, w);
 610               mpfr_init2 (h, w);
 611               /* we want a lower bound on -log(-x), thus an upper bound
 612                  on log(-x), thus an upper bound on -x. */
 613               mpfr_neg (l, x, MPFR_RNDU); /* upper bound on -x */
 614               mpfr_log (l, l, MPFR_RNDU); /* upper bound for log(-x) */
 615               mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(-x) */
 616               mpfr_neg (h, x, MPFR_RNDD); /* lower bound on -x */
 617               mpfr_log (h, h, MPFR_RNDD); /* lower bound on log(-x) */
 618               mpfr_neg (h, h, MPFR_RNDU); /* upper bound for -log(-x) */
 619               mpfr_sub (h, h, x, MPFR_RNDU); /* upper bound for -log(-x) - x */
 620               inex = mpfr_prec_round (l, MPFR_PREC (y), rnd);
 621               inex2 = mpfr_prec_round (h, MPFR_PREC (y), rnd);
 622               /* Caution: we not only need l = h, but both inexact flags
 623                  should agree. Indeed, one of the inexact flags might be
 624                  zero. In that case if we assume ln|gamma(x)| cannot be
 625                  exact, the other flag should be correct. We are conservative
 626                  here and request that both inexact flags agree. */
 627               ok = SAME_SIGN (inex, inex2) && mpfr_equal_p (l, h);
 628               if (ok)
 629                 mpfr_set (y, h, rnd); /* exact */
 630               else
 631                 expl = MPFR_EXP (l);
 632               mpfr_clear (l);
 633               mpfr_clear (h);
 634               if (ok)
 635                 return inex;
 636               /* if ulp(log(-x)) <= |x| there is no reason to loop,
 637                  since the width of [l, h] will be at least |x| */
 638               if (expl < MPFR_EXP(x) + (mpfr_exp_t) w)
 639                 break;
 640               w += MPFR_INT_CEIL_LOG2(w) + 3;
 641             }
 642         }
 643     }
 644
 645   inex = mpfr_lngamma_aux (y, x, rnd);
 646   return inex;
 647 }
 648
 649 #endif