contrib/gmp/gmp-impl.h

   1 /* Include file for internal GNU MP types and definitions.
   2
   3    THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
   4    BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
   5
   6 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
   7 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
   8
   9 This file is part of the GNU MP Library.
  10
  11 The GNU MP Library is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU Lesser General Public License as published by
  13 the Free Software Foundation; either version 3 of the License, or (at your
  14 option) any later version.
  15
  16 The GNU MP Library is distributed in the hope that it will be useful, but
  17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  18 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  19 License for more details.
  20
  21 You should have received a copy of the GNU Lesser General Public License
  22 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  23
  24
  25 /* __GMP_DECLSPEC must be given on any global data that will be accessed
  26    from outside libgmp, meaning from the test or development programs, or
  27    from libgmpxx.  Failing to do this will result in an incorrect address
  28    being used for the accesses.  On functions __GMP_DECLSPEC makes calls
  29    from outside libgmp more efficient, but they'll still work fine without
  30    it.  */
  31
  32
  33 #ifndef __GMP_IMPL_H__
  34 #define __GMP_IMPL_H__
  35
  36 #if defined _CRAY
  37 #include <intrinsics.h>  /* for _popcnt */
  38 #endif
  39
  40 /* limits.h is not used in general, since it's an ANSI-ism, and since on
  41    solaris gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong
  42    values (the ABI=64 values).
  43
  44    On Cray vector systems, however, we need the system limits.h since sizes
  45    of signed and unsigned types can differ there, depending on compiler
  46    options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail.  For
  47    reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and
  48    short can be 24, 32, 46 or 64 bits, and different for ushort.  */
  49
  50 #if defined _CRAY
  51 #include <limits.h>
  52 #endif
  53
  54 /* For fat.h and other fat binary stuff.
  55    No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
  56    declared this way are only used to set function pointers in __gmp_cpuvec,
  57    they're not called directly.  */
  58 #define DECL_add_n(name) \
  59   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t))
  60 #define DECL_addmul_1(name) \
  61   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
  62 #define DECL_copyd(name) \
  63   void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
  64 #define DECL_copyi(name) \
  65   DECL_copyd (name)
  66 #define DECL_divexact_1(name) \
  67   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
  68 #define DECL_divexact_by3c(name) \
  69   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
  70 #define DECL_divrem_1(name) \
  71   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t))
  72 #define DECL_gcd_1(name) \
  73   mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
  74 #define DECL_lshift(name) \
  75   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned))
  76 #define DECL_mod_1(name) \
  77   mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
  78 #define DECL_mod_34lsub1(name) \
  79   mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t))
  80 #define DECL_modexact_1c_odd(name) \
  81   mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
  82 #define DECL_mul_1(name) \
  83   DECL_addmul_1 (name)
  84 #define DECL_mul_basecase(name) \
  85   void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t))
  86 #define DECL_preinv_divrem_1(name) \
  87   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int))
  88 #define DECL_preinv_mod_1(name) \
  89   mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
  90 #define DECL_rshift(name) \
  91   DECL_lshift (name)
  92 #define DECL_sqr_basecase(name) \
  93   void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
  94 #define DECL_sub_n(name) \
  95   DECL_add_n (name)
  96 #define DECL_submul_1(name) \
  97   DECL_addmul_1 (name)
  98
  99 #if ! __GMP_WITHIN_CONFIGURE
 100 #include "config.h"
 101 #include "gmp-mparam.h"
 102 #include "fib_table.h"
 103 #include "mp_bases.h"
 104 #if WANT_FAT_BINARY
 105 #include "fat.h"
 106 #endif
 107 #endif
 108
 109 #if HAVE_INTTYPES_H      /* for uint_least32_t */
 110 # include <inttypes.h>
 111 #else
 112 # if HAVE_STDINT_H
 113 #  include <stdint.h>
 114 # endif
 115 #endif
 116
 117 #ifdef __cplusplus
 118 #include <cstring>  /* for strlen */
 119 #include <string>   /* for std::string */
 120 #endif
 121
 122
 123 #ifndef WANT_TMP_DEBUG  /* for TMP_ALLOC_LIMBS_2 and others */
 124 #define WANT_TMP_DEBUG 0
 125 #endif
 126
 127 /* The following tries to get a good version of alloca.  The tests are
 128    adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.
 129    Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will
 130    be setup appropriately.
 131
 132    ifndef alloca - a cpp define might already exist.
 133        glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.
 134        HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.
 135
 136    GCC __builtin_alloca - preferred whenever available.
 137
 138    _AIX pragma - IBM compilers need a #pragma in "each module that needs to
 139        use alloca".  Pragma indented to protect pre-ANSI cpp's.  _IBMR2 was
 140        used in past versions of GMP, retained still in case it matters.
 141
 142        The autoconf manual says this pragma needs to be at the start of a C
 143        file, apart from comments and preprocessor directives.  Is that true?
 144        xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc
 145        from gmp.h.
 146 */
 147
 148 #ifndef alloca
 149 # ifdef __GNUC__
 150 #  define alloca __builtin_alloca
 151 # else
 152 #  ifdef __DECC
 153 #   define alloca(x) __ALLOCA(x)
 154 #  else
 155 #   ifdef _MSC_VER
 156 #    include <malloc.h>
 157 #    define alloca _alloca
 158 #   else
 159 #    if HAVE_ALLOCA_H
 160 #     include <alloca.h>
 161 #    else
 162 #     if defined (_AIX) || defined (_IBMR2)
 163  #pragma alloca
 164 #     else
 165        char *alloca ();
 166 #     endif
 167 #    endif
 168 #   endif
 169 #  endif
 170 # endif
 171 #endif
 172
 173
 174 /* if not provided by gmp-mparam.h */
 175 #ifndef BYTES_PER_MP_LIMB
 176 #define BYTES_PER_MP_LIMB  SIZEOF_MP_LIMB_T
 177 #endif
 178 #ifndef BITS_PER_MP_LIMB
 179 #define BITS_PER_MP_LIMB  (8 * SIZEOF_MP_LIMB_T)
 180 #endif
 181
 182 #define BITS_PER_ULONG  (8 * SIZEOF_UNSIGNED_LONG)
 183
 184
 185 /* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */
 186 #if HAVE_UINT_LEAST32_T
 187 typedef uint_least32_t      gmp_uint_least32_t;
 188 #else
 189 #if SIZEOF_UNSIGNED_SHORT >= 4
 190 typedef unsigned short      gmp_uint_least32_t;
 191 #else
 192 #if SIZEOF_UNSIGNED >= 4
 193 typedef unsigned            gmp_uint_least32_t;
 194 #else
 195 typedef unsigned long       gmp_uint_least32_t;
 196 #endif
 197 #endif
 198 #endif
 199
 200
 201 /* const and signed must match __gmp_const and __gmp_signed, so follow the
 202    decision made for those in gmp.h.    */
 203 #if ! __GMP_HAVE_CONST
 204 #define const   /* empty */
 205 #define signed  /* empty */
 206 #endif
 207
 208 /* "const" basically means a function does nothing but examine its arguments
 209    and give a return value, it doesn't read or write any memory (neither
 210    global nor pointed to by arguments), and has no other side-effects.  This
 211    is more restrictive than "pure".  See info node "(gcc)Function
 212    Attributes".  __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn
 213    this off when trying to write timing loops.  */
 214 #if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
 215 #define ATTRIBUTE_CONST  __attribute__ ((const))
 216 #else
 217 #define ATTRIBUTE_CONST
 218 #endif
 219
 220 #if HAVE_ATTRIBUTE_NORETURN
 221 #define ATTRIBUTE_NORETURN  __attribute__ ((noreturn))
 222 #else
 223 #define ATTRIBUTE_NORETURN
 224 #endif
 225
 226 /* "malloc" means a function behaves like malloc in that the pointer it
 227    returns doesn't alias anything.  */
 228 #if HAVE_ATTRIBUTE_MALLOC
 229 #define ATTRIBUTE_MALLOC  __attribute__ ((malloc))
 230 #else
 231 #define ATTRIBUTE_MALLOC
 232 #endif
 233
 234
 235 #if ! HAVE_STRCHR
 236 #define strchr(s,c)  index(s,c)
 237 #endif
 238
 239 #if ! HAVE_MEMSET
 240 #define memset(p, c, n)                 \
 241   do {                                  \
 242     ASSERT ((n) >= 0);                  \
 243     char *__memset__p = (p);            \
 244     int  __i;                           \
 245     for (__i = 0; __i < (n); __i++)     \
 246       __memset__p[__i] = (c);           \
 247   } while (0)
 248 #endif
 249
 250 /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89
 251    mode.  Falling back to a memcpy will give maximum portability, since it
 252    works no matter whether va_list is a pointer, struct or array.  */
 253 #if ! defined (va_copy) && defined (__va_copy)
 254 #define va_copy(dst,src)  __va_copy(dst,src)
 255 #endif
 256 #if ! defined (va_copy)
 257 #define va_copy(dst,src) \
 258   do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)
 259 #endif
 260
 261
 262 /* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions
 263    (ie. ctlz, ctpop, cttz).  */
 264 #if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68  \
 265   || HAVE_HOST_CPU_alphaev7
 266 #define HAVE_HOST_CPU_alpha_CIX 1
 267 #endif
 268
 269
 270 #if defined (__cplusplus)
 271 extern "C" {
 272 #endif
 273
 274
 275 /* Usage: TMP_DECL;
 276           TMP_MARK;
 277           ptr = TMP_ALLOC (bytes);
 278           TMP_FREE;
 279
 280    Small allocations should use TMP_SALLOC, big allocations should use
 281    TMP_BALLOC.  Allocations that might be small or big should use TMP_ALLOC.
 282
 283    Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and
 284    TMP_SFREE.
 285
 286    TMP_DECL just declares a variable, but might be empty and so must be last
 287    in a list of variables.  TMP_MARK must be done before any TMP_ALLOC.
 288    TMP_ALLOC(0) is not allowed.  TMP_FREE doesn't need to be done if a
 289    TMP_MARK was made, but then no TMP_ALLOCs.  */
 290
 291 /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or
 292    __gmp_allocate_func doesn't already determine it.  Currently TMP_ALLOC
 293    isn't used for "double"s, so that's not in the union.  */
 294 union tmp_align_t {
 295   mp_limb_t  l;
 296   char       *p;
 297 };
 298 #define __TMP_ALIGN  sizeof (union tmp_align_t)
 299
 300 /* Return "a" rounded upwards to a multiple of "m", if it isn't already.
 301    "a" must be an unsigned type.
 302    This is designed for use with a compile-time constant "m".
 303    The POW2 case is expected to be usual, and gcc 3.0 and up recognises
 304    "(-(8*n))%8" or the like is always zero, which means the rounding up in
 305    the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop.  */
 306 #define ROUND_UP_MULTIPLE(a,m)          \
 307   (POW2_P(m) ? (a) + (-(a))%(m)         \
 308    : (a)+(m)-1 - (((a)+(m)-1) % (m)))
 309
 310 #if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT)
 311 struct tmp_reentrant_t {
 312   struct tmp_reentrant_t  *next;
 313   size_t                  size;   /* bytes, including header */
 314 };
 315 void *__gmp_tmp_reentrant_alloc __GMP_PROTO ((struct tmp_reentrant_t **, size_t)) ATTRIBUTE_MALLOC;
 316 void  __gmp_tmp_reentrant_free __GMP_PROTO ((struct tmp_reentrant_t *));
 317 #endif
 318
 319 #if WANT_TMP_ALLOCA
 320 #define TMP_SDECL
 321 #define TMP_DECL                struct tmp_reentrant_t *__tmp_marker
 322 #define TMP_SMARK
 323 #define TMP_MARK                __tmp_marker = 0
 324 #define TMP_SALLOC(n)           alloca(n)
 325 #define TMP_BALLOC(n)           __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
 326 #define TMP_ALLOC(n)                                                    \
 327   (LIKELY ((n) < 65536) ? TMP_SALLOC(n) : TMP_BALLOC(n))
 328 #define TMP_SFREE
 329 #define TMP_FREE                                                           \
 330   do {                                                                     \
 331     if (UNLIKELY (__tmp_marker != 0)) __gmp_tmp_reentrant_free (__tmp_marker); \
 332   } while (0)
 333 #endif
 334
 335 #if WANT_TMP_REENTRANT
 336 #define TMP_SDECL               TMP_DECL
 337 #define TMP_DECL                struct tmp_reentrant_t *__tmp_marker
 338 #define TMP_SMARK               TMP_MARK
 339 #define TMP_MARK                __tmp_marker = 0
 340 #define TMP_SALLOC(n)           TMP_ALLOC(n)
 341 #define TMP_BALLOC(n)           TMP_ALLOC(n)
 342 #define TMP_ALLOC(n)            __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
 343 #define TMP_SFREE               TMP_FREE
 344 #define TMP_FREE                __gmp_tmp_reentrant_free (__tmp_marker)
 345 #endif
 346
 347 #if WANT_TMP_NOTREENTRANT
 348 struct tmp_marker
 349 {
 350   struct tmp_stack *which_chunk;
 351   void *alloc_point;
 352 };
 353 void *__gmp_tmp_alloc __GMP_PROTO ((unsigned long)) ATTRIBUTE_MALLOC;
 354 void __gmp_tmp_mark __GMP_PROTO ((struct tmp_marker *));
 355 void __gmp_tmp_free __GMP_PROTO ((struct tmp_marker *));
 356 #define TMP_SDECL               TMP_DECL
 357 #define TMP_DECL                struct tmp_marker __tmp_marker
 358 #define TMP_SMARK               TMP_MARK
 359 #define TMP_MARK                __gmp_tmp_mark (&__tmp_marker)
 360 #define TMP_SALLOC(n)           TMP_ALLOC(n)
 361 #define TMP_BALLOC(n)           TMP_ALLOC(n)
 362 #define TMP_ALLOC(n)                                                    \
 363   __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN))
 364 #define TMP_SFREE               TMP_FREE
 365 #define TMP_FREE                __gmp_tmp_free (&__tmp_marker)
 366 #endif
 367
 368 #if WANT_TMP_DEBUG
 369 /* See tal-debug.c for some comments. */
 370 struct tmp_debug_t {
 371   struct tmp_debug_entry_t  *list;
 372   const char                *file;
 373   int                       line;
 374 };
 375 struct tmp_debug_entry_t {
 376   struct tmp_debug_entry_t  *next;
 377   char                      *block;
 378   size_t                    size;
 379 };
 380 void  __gmp_tmp_debug_mark  __GMP_PROTO ((const char *, int, struct tmp_debug_t **,
 381                                      struct tmp_debug_t *,
 382                                      const char *, const char *));
 383 void *__gmp_tmp_debug_alloc __GMP_PROTO ((const char *, int, int,
 384                                      struct tmp_debug_t **, const char *,
 385                                      size_t)) ATTRIBUTE_MALLOC;
 386 void  __gmp_tmp_debug_free  __GMP_PROTO ((const char *, int, int,
 387                                      struct tmp_debug_t **,
 388                                      const char *, const char *));
 389 #define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
 390 #define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
 391 #define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
 392 #define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
 393 #define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
 394 #define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
 395 /* The marker variable is designed to provoke an uninitialized variable
 396    warning from the compiler if TMP_FREE is used without a TMP_MARK.
 397    __tmp_marker_inscope does the same for TMP_ALLOC.  Runtime tests pick
 398    these things up too.  */
 399 #define TMP_DECL_NAME(marker, marker_name)                      \
 400   int marker;                                                   \
 401   int __tmp_marker_inscope;                                     \
 402   const char *__tmp_marker_name = marker_name;                  \
 403   struct tmp_debug_t  __tmp_marker_struct;                      \
 404   /* don't demand NULL, just cast a zero */                     \
 405   struct tmp_debug_t  *__tmp_marker = (struct tmp_debug_t *) 0
 406 #define TMP_MARK_NAME(marker, marker_name)                      \
 407   do {                                                          \
 408     marker = 1;                                                 \
 409     __tmp_marker_inscope = 1;                                   \
 410     __gmp_tmp_debug_mark  (ASSERT_FILE, ASSERT_LINE,            \
 411                            &__tmp_marker, &__tmp_marker_struct, \
 412                            __tmp_marker_name, marker_name);     \
 413   } while (0)
 414 #define TMP_SALLOC(n)           TMP_ALLOC(n)
 415 #define TMP_BALLOC(n)           TMP_ALLOC(n)
 416 #define TMP_ALLOC(size)                                                 \
 417   __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE,                      \
 418                          __tmp_marker_inscope,                          \
 419                          &__tmp_marker, __tmp_marker_name, size)
 420 #define TMP_FREE_NAME(marker, marker_name)                      \
 421   do {                                                          \
 422     __gmp_tmp_debug_free  (ASSERT_FILE, ASSERT_LINE,            \
 423                            marker, &__tmp_marker,               \
 424                            __tmp_marker_name, marker_name);     \
 425   } while (0)
 426 #endif /* WANT_TMP_DEBUG */
 427
 428
 429 /* Allocating various types. */
 430 #define TMP_ALLOC_TYPE(n,type)  ((type *) TMP_ALLOC ((n) * sizeof (type)))
 431 #define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type)))
 432 #define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type)))
 433 #define TMP_ALLOC_LIMBS(n)      TMP_ALLOC_TYPE(n,mp_limb_t)
 434 #define TMP_SALLOC_LIMBS(n)     TMP_SALLOC_TYPE(n,mp_limb_t)
 435 #define TMP_BALLOC_LIMBS(n)     TMP_BALLOC_TYPE(n,mp_limb_t)
 436 #define TMP_ALLOC_MP_PTRS(n)    TMP_ALLOC_TYPE(n,mp_ptr)
 437 #define TMP_SALLOC_MP_PTRS(n)   TMP_SALLOC_TYPE(n,mp_ptr)
 438 #define TMP_BALLOC_MP_PTRS(n)   TMP_BALLOC_TYPE(n,mp_ptr)
 439
 440 /* It's more efficient to allocate one block than two.  This is certainly
 441    true of the malloc methods, but it can even be true of alloca if that
 442    involves copying a chunk of stack (various RISCs), or a call to a stack
 443    bounds check (mingw).  In any case, when debugging keep separate blocks
 444    so a redzoning malloc debugger can protect each individually.  */
 445 #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize)           \
 446   do {                                                  \
 447     if (WANT_TMP_DEBUG)                                 \
 448       {                                                 \
 449         (xp) = TMP_ALLOC_LIMBS (xsize);                 \
 450         (yp) = TMP_ALLOC_LIMBS (ysize);                 \
 451       }                                                 \
 452     else                                                \
 453       {                                                 \
 454         (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize));     \
 455         (yp) = (xp) + (xsize);                          \
 456       }                                                 \
 457   } while (0)
 458
 459
 460 /* From gmp.h, nicer names for internal use. */
 461 #define CRAY_Pragma(str)               __GMP_CRAY_Pragma(str)
 462 #define MPN_CMP(result, xp, yp, size)  __GMPN_CMP(result, xp, yp, size)
 463 #define LIKELY(cond)                   __GMP_LIKELY(cond)
 464 #define UNLIKELY(cond)                 __GMP_UNLIKELY(cond)
 465
 466 #define ABS(x) ((x) >= 0 ? (x) : -(x))
 467 #undef MIN
 468 #define MIN(l,o) ((l) < (o) ? (l) : (o))
 469 #undef MAX
 470 #define MAX(h,i) ((h) > (i) ? (h) : (i))
 471 #define numberof(x)  (sizeof (x) / sizeof ((x)[0]))
 472
 473 /* Field access macros.  */
 474 #define SIZ(x) ((x)->_mp_size)
 475 #define ABSIZ(x) ABS (SIZ (x))
 476 #define PTR(x) ((x)->_mp_d)
 477 #define LIMBS(x) ((x)->_mp_d)
 478 #define EXP(x) ((x)->_mp_exp)
 479 #define PREC(x) ((x)->_mp_prec)
 480 #define ALLOC(x) ((x)->_mp_alloc)
 481
 482 /* n-1 inverts any low zeros and the lowest one bit.  If n&(n-1) leaves zero
 483    then that lowest one bit must have been the only bit set.  n==0 will
 484    return true though, so avoid that.  */
 485 #define POW2_P(n)  (((n) & ((n) - 1)) == 0)
 486
 487
 488 /* The "short" defines are a bit different because shorts are promoted to
 489    ints by ~ or >> etc.
 490
 491    #ifndef's are used since on some systems (HP?) header files other than
 492    limits.h setup these defines.  We could forcibly #undef in that case, but
 493    there seems no need to worry about that.  */
 494
 495 #ifndef ULONG_MAX
 496 #define ULONG_MAX   __GMP_ULONG_MAX
 497 #endif
 498 #ifndef UINT_MAX
 499 #define UINT_MAX    __GMP_UINT_MAX
 500 #endif
 501 #ifndef USHRT_MAX
 502 #define USHRT_MAX   __GMP_USHRT_MAX
 503 #endif
 504 #define MP_LIMB_T_MAX      (~ (mp_limb_t) 0)
 505
 506 /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be
 507    unsigned on a K&R compiler.  In particular the HP-UX 10 bundled K&R cc
 508    treats the plain decimal values in <limits.h> as signed.  */
 509 #define ULONG_HIGHBIT      (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))
 510 #define UINT_HIGHBIT       (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))
 511 #define USHRT_HIGHBIT      ((unsigned short) (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1)))
 512 #define GMP_LIMB_HIGHBIT  (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))
 513
 514 #ifndef LONG_MIN
 515 #define LONG_MIN           ((long) ULONG_HIGHBIT)
 516 #endif
 517 #ifndef LONG_MAX
 518 #define LONG_MAX           (-(LONG_MIN+1))
 519 #endif
 520
 521 #ifndef INT_MIN
 522 #define INT_MIN            ((int) UINT_HIGHBIT)
 523 #endif
 524 #ifndef INT_MAX
 525 #define INT_MAX            (-(INT_MIN+1))
 526 #endif
 527
 528 #ifndef SHRT_MIN
 529 #define SHRT_MIN           ((short) USHRT_HIGHBIT)
 530 #endif
 531 #ifndef SHRT_MAX
 532 #define SHRT_MAX           ((short) (-(SHRT_MIN+1)))
 533 #endif
 534
 535 #if __GMP_MP_SIZE_T_INT
 536 #define MP_SIZE_T_MAX      INT_MAX
 537 #define MP_SIZE_T_MIN      INT_MIN
 538 #else
 539 #define MP_SIZE_T_MAX      LONG_MAX
 540 #define MP_SIZE_T_MIN      LONG_MIN
 541 #endif
 542
 543 /* mp_exp_t is the same as mp_size_t */
 544 #define MP_EXP_T_MAX   MP_SIZE_T_MAX
 545 #define MP_EXP_T_MIN   MP_SIZE_T_MIN
 546
 547 #define LONG_HIGHBIT       LONG_MIN
 548 #define INT_HIGHBIT        INT_MIN
 549 #define SHRT_HIGHBIT       SHRT_MIN
 550
 551
 552 #define GMP_NUMB_HIGHBIT  (CNST_LIMB(1) << (GMP_NUMB_BITS-1))
 553
 554 #if GMP_NAIL_BITS == 0
 555 #define GMP_NAIL_LOWBIT   CNST_LIMB(0)
 556 #else
 557 #define GMP_NAIL_LOWBIT   (CNST_LIMB(1) << GMP_NUMB_BITS)
 558 #endif
 559
 560 #if GMP_NAIL_BITS != 0
 561 /* Set various *_THRESHOLD values to be used for nails.  Thus we avoid using
 562    code that has not yet been qualified.  */
 563
 564 #undef DIV_SB_PREINV_THRESHOLD
 565 #undef DIV_DC_THRESHOLD
 566 #undef POWM_THRESHOLD
 567 #define DIV_SB_PREINV_THRESHOLD           MP_SIZE_T_MAX
 568 #define DIV_DC_THRESHOLD                 50
 569 #define POWM_THRESHOLD                    0
 570
 571 #undef GCD_ACCEL_THRESHOLD
 572 #define GCD_ACCEL_THRESHOLD               3
 573
 574 #undef DIVREM_1_NORM_THRESHOLD
 575 #undef DIVREM_1_UNNORM_THRESHOLD
 576 #undef MOD_1_NORM_THRESHOLD
 577 #undef MOD_1_UNNORM_THRESHOLD
 578 #undef USE_PREINV_DIVREM_1
 579 #undef USE_PREINV_MOD_1
 580 #undef DIVREM_2_THRESHOLD
 581 #undef DIVEXACT_1_THRESHOLD
 582 #undef MODEXACT_1_ODD_THRESHOLD
 583 #define DIVREM_1_NORM_THRESHOLD           MP_SIZE_T_MAX  /* no preinv */
 584 #define DIVREM_1_UNNORM_THRESHOLD         MP_SIZE_T_MAX  /* no preinv */
 585 #define MOD_1_NORM_THRESHOLD              MP_SIZE_T_MAX  /* no preinv */
 586 #define MOD_1_UNNORM_THRESHOLD            MP_SIZE_T_MAX  /* no preinv */
 587 #define USE_PREINV_DIVREM_1               0  /* no preinv */
 588 #define USE_PREINV_MOD_1                  0  /* no preinv */
 589 #define DIVREM_2_THRESHOLD                MP_SIZE_T_MAX  /* no preinv */
 590
 591 /* mpn/generic/mul_fft.c is not nails-capable. */
 592 #undef  MUL_FFT_THRESHOLD
 593 #undef  SQR_FFT_THRESHOLD
 594 #define MUL_FFT_THRESHOLD                MP_SIZE_T_MAX
 595 #define SQR_FFT_THRESHOLD                MP_SIZE_T_MAX
 596 #endif
 597
 598 /* Swap macros. */
 599
 600 #define MP_LIMB_T_SWAP(x, y)                    \
 601   do {                                          \
 602     mp_limb_t __mp_limb_t_swap__tmp = (x);      \
 603     (x) = (y);                                  \
 604     (y) = __mp_limb_t_swap__tmp;                \
 605   } while (0)
 606 #define MP_SIZE_T_SWAP(x, y)                    \
 607   do {                                          \
 608     mp_size_t __mp_size_t_swap__tmp = (x);      \
 609     (x) = (y);                                  \
 610     (y) = __mp_size_t_swap__tmp;                \
 611   } while (0)
 612
 613 #define MP_PTR_SWAP(x, y)               \
 614   do {                                  \
 615     mp_ptr __mp_ptr_swap__tmp = (x);    \
 616     (x) = (y);                          \
 617     (y) = __mp_ptr_swap__tmp;           \
 618   } while (0)
 619 #define MP_SRCPTR_SWAP(x, y)                    \
 620   do {                                          \
 621     mp_srcptr __mp_srcptr_swap__tmp = (x);      \
 622     (x) = (y);                                  \
 623     (y) = __mp_srcptr_swap__tmp;                \
 624   } while (0)
 625
 626 #define MPN_PTR_SWAP(xp,xs, yp,ys)      \
 627   do {                                  \
 628     MP_PTR_SWAP (xp, yp);               \
 629     MP_SIZE_T_SWAP (xs, ys);            \
 630   } while(0)
 631 #define MPN_SRCPTR_SWAP(xp,xs, yp,ys)   \
 632   do {                                  \
 633     MP_SRCPTR_SWAP (xp, yp);            \
 634     MP_SIZE_T_SWAP (xs, ys);            \
 635   } while(0)
 636
 637 #define MPZ_PTR_SWAP(x, y)              \
 638   do {                                  \
 639     mpz_ptr __mpz_ptr_swap__tmp = (x);  \
 640     (x) = (y);                          \
 641     (y) = __mpz_ptr_swap__tmp;          \
 642   } while (0)
 643 #define MPZ_SRCPTR_SWAP(x, y)                   \
 644   do {                                          \
 645     mpz_srcptr __mpz_srcptr_swap__tmp = (x);    \
 646     (x) = (y);                                  \
 647     (y) = __mpz_srcptr_swap__tmp;               \
 648   } while (0)
 649
 650
 651 /* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))",
 652    but current gcc (3.0) doesn't seem to support that.  */
 653 __GMP_DECLSPEC extern void * (*__gmp_allocate_func) __GMP_PROTO ((size_t));
 654 __GMP_DECLSPEC extern void * (*__gmp_reallocate_func) __GMP_PROTO ((void *, size_t, size_t));
 655 __GMP_DECLSPEC extern void   (*__gmp_free_func) __GMP_PROTO ((void *, size_t));
 656
 657 void *__gmp_default_allocate __GMP_PROTO ((size_t));
 658 void *__gmp_default_reallocate __GMP_PROTO ((void *, size_t, size_t));
 659 void __gmp_default_free __GMP_PROTO ((void *, size_t));
 660
 661 #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
 662   ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
 663 #define __GMP_ALLOCATE_FUNC_LIMBS(n)   __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
 664
 665 #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
 666   ((type *) (*__gmp_reallocate_func)                            \
 667    (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
 668 #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
 669   __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
 670
 671 #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
 672 #define __GMP_FREE_FUNC_LIMBS(p,n)     __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
 673
 674 #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize)      \
 675   do {                                                          \
 676     if ((oldsize) != (newsize))                                 \
 677       (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \
 678   } while (0)
 679
 680 #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type)   \
 681   do {                                                                  \
 682     if ((oldsize) != (newsize))                                         \
 683       (ptr) = (type *) (*__gmp_reallocate_func)                         \
 684         (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type));    \
 685   } while (0)
 686
 687
 688 /* Dummy for non-gcc, code involving it will go dead. */
 689 #if ! defined (__GNUC__) || __GNUC__ < 2
 690 #define __builtin_constant_p(x)   0
 691 #endif
 692
 693
 694 /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the
 695    stack usage is compatible.  __attribute__ ((regparm (N))) helps by
 696    putting leading parameters in registers, avoiding extra stack.
 697
 698    regparm cannot be used with calls going through the PLT, because the
 699    binding code there may clobber the registers (%eax, %edx, %ecx) used for
 700    the regparm parameters.  Calls to local (ie. static) functions could
 701    still use this, if we cared to differentiate locals and globals.
 702
 703    On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with
 704    -p or -pg profiling, since that version of gcc doesn't realize the
 705    .mcount calls will clobber the parameter registers.  Other systems are
 706    ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't
 707    bother to try to detect this.  regparm is only an optimization so we just
 708    disable it when profiling (profiling being a slowdown anyway).  */
 709
 710 #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \
 711   && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF
 712 #define USE_LEADING_REGPARM 1
 713 #else
 714 #define USE_LEADING_REGPARM 0
 715 #endif
 716
 717 /* Macros for altering parameter order according to regparm usage. */
 718 #if USE_LEADING_REGPARM
 719 #define REGPARM_2_1(a,b,x)    x,a,b
 720 #define REGPARM_3_1(a,b,c,x)  x,a,b,c
 721 #define REGPARM_ATTR(n) __attribute__ ((regparm (n)))
 722 #else
 723 #define REGPARM_2_1(a,b,x)    a,b,x
 724 #define REGPARM_3_1(a,b,c,x)  a,b,c,x
 725 #define REGPARM_ATTR(n)
 726 #endif
 727
 728
 729 /* ASM_L gives a local label for a gcc asm block, for use when temporary
 730    local labels like "1:" might not be available, which is the case for
 731    instance on the x86s (the SCO assembler doesn't support them).
 732
 733    The label generated is made unique by including "%=" which is a unique
 734    number for each insn.  This ensures the same name can be used in multiple
 735    asm blocks, perhaps via a macro.  Since jumps between asm blocks are not
 736    allowed there's no need for a label to be usable outside a single
 737    block.  */
 738
 739 #define ASM_L(name)  LSYM_PREFIX "asm_%=_" #name
 740
 741
 742 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86
 743 #if 0
 744 /* FIXME: Check that these actually improve things.
 745    FIXME: Need a cld after each std.
 746    FIXME: Can't have inputs in clobbered registers, must describe them as
 747    dummy outputs, and add volatile. */
 748 #define MPN_COPY_INCR(DST, SRC, N)                                      \
 749   __asm__ ("cld\n\trep\n\tmovsl" : :                                    \
 750            "D" (DST), "S" (SRC), "c" (N) :                              \
 751            "cx", "di", "si", "memory")
 752 #define MPN_COPY_DECR(DST, SRC, N)                                      \
 753   __asm__ ("std\n\trep\n\tmovsl" : :                                    \
 754            "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) :      \
 755            "cx", "di", "si", "memory")
 756 #endif
 757 #endif
 758
 759
 760 void __gmpz_aorsmul_1 __GMP_PROTO ((REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t))) REGPARM_ATTR(1);
 761 #define mpz_aorsmul_1(w,u,v,sub)  __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
 762
 763 #define mpz_n_pow_ui __gmpz_n_pow_ui
 764 void    mpz_n_pow_ui __GMP_PROTO ((mpz_ptr, mp_srcptr, mp_size_t, unsigned long));
 765
 766
 767 #define mpn_addmul_1c __MPN(addmul_1c)
 768 __GMP_DECLSPEC mp_limb_t mpn_addmul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 769
 770 #define mpn_addmul_2 __MPN(addmul_2)
 771 __GMP_DECLSPEC mp_limb_t mpn_addmul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 772
 773 #define mpn_addmul_3 __MPN(addmul_3)
 774 __GMP_DECLSPEC mp_limb_t mpn_addmul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 775
 776 #define mpn_addmul_4 __MPN(addmul_4)
 777 __GMP_DECLSPEC mp_limb_t mpn_addmul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 778
 779 #define mpn_addmul_5 __MPN(addmul_5)
 780 __GMP_DECLSPEC mp_limb_t mpn_addmul_5 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 781
 782 #define mpn_addmul_6 __MPN(addmul_6)
 783 __GMP_DECLSPEC mp_limb_t mpn_addmul_6 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 784
 785 #define mpn_addmul_7 __MPN(addmul_7)
 786 __GMP_DECLSPEC mp_limb_t mpn_addmul_7 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 787
 788 #define mpn_addmul_8 __MPN(addmul_8)
 789 __GMP_DECLSPEC mp_limb_t mpn_addmul_8 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 790
 791 /* mpn_addlsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+2*{b,n}, and
 792    returns the carry out (0, 1 or 2).  */
 793 #define mpn_addlsh1_n __MPN(addlsh1_n)
 794 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 795
 796 /* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and
 797    returns the borrow out (0, 1 or 2).  */
 798 #define mpn_sublsh1_n __MPN(sublsh1_n)
 799 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 800
 801 /* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1,
 802    and returns the bit rshifted out (0 or 1).  */
 803 #define mpn_rsh1add_n __MPN(rsh1add_n)
 804 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 805
 806 /* mpn_rsh1sub_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} - {b,n}) >> 1,
 807    and returns the bit rshifted out (0 or 1).  If there's a borrow from the
 808    subtract, it's stored as a 1 in the high bit of c[n-1], like a twos
 809    complement negative.  */
 810 #define mpn_rsh1sub_n __MPN(rsh1sub_n)
 811 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 812
 813 #define mpn_lshiftc __MPN(lshiftc)
 814 __GMP_DECLSPEC mp_limb_t mpn_lshiftc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
 815
 816 #define mpn_addsub_n __MPN(addsub_n)
 817 __GMP_DECLSPEC mp_limb_t mpn_addsub_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 818
 819 #define mpn_addsub_nc __MPN(addsub_nc)
 820 __GMP_DECLSPEC mp_limb_t mpn_addsub_nc __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
 821
 822 #define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0)
 823 __GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 824
 825 #define mpn_divrem_1c __MPN(divrem_1c)
 826 __GMP_DECLSPEC mp_limb_t mpn_divrem_1c __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 827
 828 #define mpn_dump __MPN(dump)
 829 __GMP_DECLSPEC void mpn_dump __GMP_PROTO ((mp_srcptr, mp_size_t));
 830
 831 #define mpn_fib2_ui __MPN(fib2_ui)
 832 mp_size_t mpn_fib2_ui __GMP_PROTO ((mp_ptr, mp_ptr, unsigned long));
 833
 834 /* Remap names of internal mpn functions.  */
 835 #define __clz_tab               __MPN(clz_tab)
 836 #define mpn_udiv_w_sdiv         __MPN(udiv_w_sdiv)
 837
 838 #define mpn_jacobi_base __MPN(jacobi_base)
 839 int mpn_jacobi_base __GMP_PROTO ((mp_limb_t, mp_limb_t, int)) ATTRIBUTE_CONST;
 840
 841 #define mpn_mod_1c __MPN(mod_1c)
 842 __GMP_DECLSPEC mp_limb_t mpn_mod_1c __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
 843
 844 #define mpn_mul_1c __MPN(mul_1c)
 845 __GMP_DECLSPEC mp_limb_t mpn_mul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 846
 847 #define mpn_mul_2 __MPN(mul_2)
 848 mp_limb_t mpn_mul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 849
 850 #define mpn_mul_3 __MPN(mul_3)
 851 __GMP_DECLSPEC mp_limb_t mpn_mul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 852
 853 #define mpn_mul_4 __MPN(mul_4)
 854 __GMP_DECLSPEC mp_limb_t mpn_mul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 855
 856 #ifndef mpn_mul_basecase  /* if not done with cpuvec in a fat binary */
 857 #define mpn_mul_basecase __MPN(mul_basecase)
 858 __GMP_DECLSPEC void mpn_mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
 859 #endif
 860
 861 #define mpn_mullow_n __MPN(mullow_n)
 862 __GMP_DECLSPEC void mpn_mullow_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 863
 864 #define mpn_mullow_basecase __MPN(mullow_basecase)
 865 __GMP_DECLSPEC void mpn_mullow_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 866
 867 #define mpn_sqr_n __MPN(sqr)    /* compatibility */
 868
 869 #ifndef mpn_sqr_basecase  /* if not done with cpuvec in a fat binary */
 870 #define mpn_sqr_basecase __MPN(sqr_basecase)
 871 __GMP_DECLSPEC void mpn_sqr_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
 872 #endif
 873
 874 #define mpn_submul_1c __MPN(submul_1c)
 875 __GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 876
 877 #define mpn_invert_2exp __MPN(invert_2exp)
 878 __GMP_DECLSPEC void mpn_invert_2exp __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
 879
 880 #define mpn_redc_1 __MPN(redc_1)
 881 __GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);)
 882
 883 #define mpn_redc_2 __MPN(redc_2)
 884 __GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 885
 886
 887 #define mpn_mod_1s_1p_cps __MPN(mod_1s_1p_cps)
 888 __GMP_DECLSPEC void mpn_mod_1s_1p_cps __GMP_PROTO ((mp_limb_t [4], mp_limb_t));
 889 #define mpn_mod_1s_1p __MPN(mod_1s_1p)
 890 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_1p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]));
 891
 892 #define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps)
 893 __GMP_DECLSPEC void mpn_mod_1s_2p_cps __GMP_PROTO ((mp_limb_t [5], mp_limb_t));
 894 #define mpn_mod_1s_2p __MPN(mod_1s_2p)
 895 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [5]));
 896
 897 #define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps)
 898 __GMP_DECLSPEC void mpn_mod_1s_3p_cps __GMP_PROTO ((mp_limb_t [6], mp_limb_t));
 899 #define mpn_mod_1s_3p __MPN(mod_1s_3p)
 900 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [6]));
 901
 902 #define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps)
 903 __GMP_DECLSPEC void mpn_mod_1s_4p_cps __GMP_PROTO ((mp_limb_t [7], mp_limb_t));
 904 #define mpn_mod_1s_4p __MPN(mod_1s_4p)
 905 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [7]));
 906
 907
 908 typedef __gmp_randstate_struct *gmp_randstate_ptr;
 909 typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
 910
 911 /* Pseudo-random number generator function pointers structure.  */
 912 typedef struct {
 913   void (*randseed_fn) __GMP_PROTO ((gmp_randstate_t, mpz_srcptr));
 914   void (*randget_fn) __GMP_PROTO ((gmp_randstate_t, mp_ptr, unsigned long int));
 915   void (*randclear_fn) __GMP_PROTO ((gmp_randstate_t));
 916   void (*randiset_fn) __GMP_PROTO ((gmp_randstate_ptr, gmp_randstate_srcptr));
 917 } gmp_randfnptr_t;
 918
 919 /* Macro to obtain a void pointer to the function pointers structure.  */
 920 #define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc)
 921
 922 /* Macro to obtain a pointer to the generator's state.
 923    When used as a lvalue the rvalue needs to be cast to mp_ptr.  */
 924 #define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)
 925
 926 /* Write a given number of random bits to rp.  */
 927 #define _gmp_rand(rp, state, bits)                              \
 928   do {                                                          \
 929     gmp_randstate_ptr  __rstate = (state);                      \
 930     (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn)   \
 931        (__rstate, rp, bits);                                    \
 932   } while (0)
 933
 934 __GMP_DECLSPEC void __gmp_randinit_mt_noseed __GMP_PROTO ((gmp_randstate_t));
 935
 936
 937 /* __gmp_rands is the global state for the old-style random functions, and
 938    is also used in the test programs (hence the __GMP_DECLSPEC).
 939
 940    There's no seeding here, so mpz_random etc will generate the same
 941    sequence every time.  This is not unlike the C library random functions
 942    if you don't seed them, so perhaps it's acceptable.  Digging up a seed
 943    from /dev/random or the like would work on many systems, but might
 944    encourage a false confidence, since it'd be pretty much impossible to do
 945    something that would work reliably everywhere.  In any case the new style
 946    functions are recommended to applications which care about randomness, so
 947    the old functions aren't too important.  */
 948
 949 __GMP_DECLSPEC extern char             __gmp_rands_initialized;
 950 __GMP_DECLSPEC extern gmp_randstate_t  __gmp_rands;
 951
 952 #define RANDS                                       \
 953   ((__gmp_rands_initialized ? 0                     \
 954     : (__gmp_rands_initialized = 1,                 \
 955        __gmp_randinit_mt_noseed (__gmp_rands), 0)), \
 956    __gmp_rands)
 957
 958 /* this is used by the test programs, to free memory */
 959 #define RANDS_CLEAR()                   \
 960   do {                                  \
 961     if (__gmp_rands_initialized)        \
 962       {                                 \
 963         __gmp_rands_initialized = 0;    \
 964         gmp_randclear (__gmp_rands);    \
 965       }                                 \
 966   } while (0)
 967
 968
 969 /* FIXME: Make these itch functions less conservative.  Also consider making
 970    them dependent on just 'an', and compute the allocation directly from 'an'
 971    instead of via n.  */
 972 static inline mp_size_t
 973 mpn_toom22_mul_itch (mp_size_t an, mp_size_t bn)
 974 {
 975   mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
 976   return 4 * n + 2;
 977 }
 978
 979 static inline mp_size_t
 980 mpn_toom33_mul_itch (mp_size_t an, mp_size_t bn)
 981 {
 982   /* We could trim this to 4n+3 if HAVE_NATIVE_mpn_sublsh1_n, since
 983      mpn_toom_interpolate_5pts only needs scratch otherwise.  */
 984   mp_size_t n = (an + 2) / (size_t) 3;
 985   return 6 * n + GMP_NUMB_BITS;
 986 }
 987
 988 static inline mp_size_t
 989 mpn_toom44_mul_itch (mp_size_t an, mp_size_t bn)
 990 {
 991   mp_size_t n = (an + 3) >> 2;
 992   return 12 * n + GMP_NUMB_BITS;
 993 }
 994
 995 static inline mp_size_t
 996 mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
 997 {
 998   mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
 999   return 4 * n + 2;
1000 }
1001
1002 static inline mp_size_t
1003 mpn_toom42_mul_itch (mp_size_t an, mp_size_t bn)
1004 {
1005   /* We could trim this to 4n+3 if HAVE_NATIVE_mpn_sublsh1_n, since
1006      mpn_toom_interpolate_5pts only needs scratch otherwise.  */
1007   mp_size_t n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
1008   return 6 * n + 3;
1009 }
1010
1011 static inline mp_size_t
1012 mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn)
1013 {
1014   mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
1015   return 10 * n + 10;
1016 }
1017
1018 static inline mp_size_t
1019 mpn_toom2_sqr_itch (mp_size_t an)
1020 {
1021   mp_size_t n = 1 + ((an - 1) >> 1);
1022   return 4 * n + 2;
1023 }
1024
1025 static inline mp_size_t
1026 mpn_toom3_sqr_itch (mp_size_t an)
1027 {
1028   /* We could trim this to 4n+3 if HAVE_NATIVE_mpn_sublsh1_n, since
1029      mpn_toom_interpolate_5pts only needs scratch otherwise.  */
1030   mp_size_t n = (an + 2) / (size_t) 3;
1031   return 6 * n + GMP_NUMB_BITS;
1032 }
1033
1034 static inline mp_size_t
1035 mpn_toom4_sqr_itch (mp_size_t an)
1036 {
1037   mp_size_t n = (an + 3) >> 2;
1038   return 12 * n + GMP_NUMB_BITS;
1039 }
1040
1041
1042 /* kara uses n+1 limbs of temporary space and then recurses with the balance,
1043    so need (n+1) + (ceil(n/2)+1) + (ceil(n/4)+1) + ...  This can be solved to
1044    2n + o(n).  Since n is very limited, o(n) in practice could be around 15.
1045    For now, assume n is arbitrarily large.  */
1046 #define MPN_KARA_MUL_N_TSIZE(n)   (2*(n) + 2*GMP_LIMB_BITS)
1047 #define MPN_KARA_SQR_N_TSIZE(n)   (2*(n) + 2*GMP_LIMB_BITS)
1048
1049 /* toom3 uses 2n + 2n/3 + o(n) limbs of temporary space if mpn_sublsh1_n is
1050    unavailable, but just 2n + o(n) if mpn_sublsh1_n is available.  It is hard
1051    to pin down the value of o(n), since it is a complex function of
1052    MUL_TOOM3_THRESHOLD and n.  Normally toom3 is used between kara and fft; in
1053    that case o(n) will be really limited.  If toom3 is used for arbitrarily
1054    large operands, o(n) will be larger.  These definitions handle operands of
1055    up to 8956264246117233 limbs.  A single multiplication using toom3 on the
1056    fastest hardware currently (2008) would need 10 million years, which
1057    suggests that these limits are acceptable.  */
1058 #if WANT_FFT
1059 #if HAVE_NATIVE_mpn_sublsh1_n
1060 #define MPN_TOOM3_MUL_N_TSIZE(n)  (2*(n) + 63)
1061 #define MPN_TOOM3_SQR_N_TSIZE(n)  (2*(n) + 63)
1062 #else
1063 #define MPN_TOOM3_MUL_N_TSIZE(n)  (2*(n) + 2*(n/3) + 63)
1064 #define MPN_TOOM3_SQR_N_TSIZE(n)  (2*(n) + 2*(n/3) + 63)
1065 #endif
1066 #else /* WANT_FFT */
1067 #if HAVE_NATIVE_mpn_sublsh1_n
1068 #define MPN_TOOM3_MUL_N_TSIZE(n)  (2*(n) + 255)
1069 #define MPN_TOOM3_SQR_N_TSIZE(n)  (2*(n) + 255)
1070 #else
1071 #define MPN_TOOM3_MUL_N_TSIZE(n)  (2*(n) + 2*(n/3) + 255)
1072 #define MPN_TOOM3_SQR_N_TSIZE(n)  (2*(n) + 2*(n/3) + 255)
1073 #endif
1074 #define MPN_TOOM44_MAX_N 285405
1075 #endif /* WANT_FFT */
1076
1077 /* need 2 so that n2>=1 */
1078 #define MPN_KARA_MUL_N_MINSIZE    2
1079 #define MPN_KARA_SQR_N_MINSIZE    2
1080
1081 /* Need l>=1, ls>=1, and 2*ls > l (the latter for the tD MPN_INCR_U) */
1082 #define MPN_TOOM3_MUL_N_MINSIZE   17
1083 #define MPN_TOOM3_SQR_N_MINSIZE   17
1084
1085 #define MPN_TOOM44_MUL_N_MINSIZE  30    /* ??? */
1086 #define MPN_TOOM4_SQR_N_MINSIZE   30    /* ??? */
1087
1088 #define   mpn_sqr_diagonal __MPN(sqr_diagonal)
1089 void      mpn_sqr_diagonal __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1090
1091 #define   mpn_kara_mul_n __MPN(kara_mul_n)
1092 void      mpn_kara_mul_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
1093
1094 #define   mpn_kara_sqr_n __MPN(kara_sqr_n)
1095 void      mpn_kara_sqr_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1096
1097 #define   mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts)
1098 void      mpn_toom_interpolate_5pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t, mp_ptr));
1099
1100 enum toom4_flags { toom4_w1_neg = 1, toom4_w3_neg = 2 }; /* FIXME */
1101 #define   mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts)
1102 void      mpn_toom_interpolate_7pts __GMP_PROTO ((mp_ptr, mp_size_t, enum toom4_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
1103
1104 #define   mpn_toom3_mul_n __MPN(toom3_mul_n)
1105 void      mpn_toom3_mul_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr));
1106
1107 #define   mpn_toom3_sqr_n __MPN(toom3_sqr_n)
1108 void      mpn_toom3_sqr_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1109
1110 #define   mpn_toom22_mul __MPN(toom22_mul)
1111 void      mpn_toom22_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1112
1113 #define   mpn_toom2_sqr __MPN(toom2_sqr)
1114 void      mpn_toom2_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1115
1116 #define   mpn_toom33_mul __MPN(toom33_mul)
1117 void      mpn_toom33_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1118
1119 #define   mpn_toom3_sqr __MPN(toom3_sqr)
1120 void      mpn_toom3_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1121
1122 #define   mpn_toom44_mul __MPN(toom44_mul)
1123 void      mpn_toom44_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1124
1125 #define   mpn_toom32_mul __MPN(toom32_mul)
1126 void      mpn_toom32_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1127
1128 #define   mpn_toom42_mul __MPN(toom42_mul)
1129 void      mpn_toom42_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1130
1131 #define   mpn_toom53_mul __MPN(toom53_mul)
1132 void      mpn_toom53_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1133
1134 #define   mpn_toom62_mul __MPN(toom62_mul)
1135 void      mpn_toom62_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1136
1137 #define   mpn_toom4_sqr __MPN(toom4_sqr)
1138 void      mpn_toom4_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1139
1140 #define   mpn_fft_best_k __MPN(fft_best_k)
1141 int       mpn_fft_best_k __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
1142
1143 #define   mpn_mul_fft __MPN(mul_fft)
1144 mp_limb_t mpn_mul_fft __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int));
1145
1146 #define   mpn_mul_fft_full __MPN(mul_fft_full)
1147 void      mpn_mul_fft_full __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
1148
1149 #define   mpn_fft_next_size __MPN(fft_next_size)
1150 mp_size_t mpn_fft_next_size __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
1151
1152 #define   mpn_sb_divrem_mn __MPN(sb_divrem_mn)
1153 mp_limb_t mpn_sb_divrem_mn __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
1154
1155 #define   mpn_dc_divrem_n __MPN(dc_divrem_n)
1156 mp_limb_t mpn_dc_divrem_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
1157
1158 #define   mpn_sb_div_qr __MPN(sb_div_qr)
1159 mp_limb_t mpn_sb_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr));
1160 #define   mpn_sb_div_q __MPN(sb_div_q)
1161 mp_limb_t mpn_sb_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr));
1162 #define   mpn_sb_divappr_q __MPN(sb_divappr_q)
1163 mp_limb_t mpn_sb_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr));
1164 #define   mpn_dc_div_qr __MPN(dc_div_qr)
1165 mp_limb_t mpn_dc_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
1166 #define   mpn_dc_div_qr_n __MPN(dc_div_qr_n)
1167 mp_limb_t mpn_dc_div_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_ptr));
1168 #define   mpn_dc_div_q __MPN(dc_div_q)
1169 mp_limb_t mpn_dc_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
1170 #define   mpn_preinv_dc_div_qr __MPN(preinv_dc_div_qr)
1171 mp_limb_t mpn_preinv_dc_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr));
1172 #define   mpn_dc_divappr_q __MPN(dc_divappr_q)
1173 mp_limb_t mpn_dc_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
1174 #define   mpn_dc_divappr_q_n __MPN(dc_divappr_q_n)
1175 mp_limb_t mpn_dc_divappr_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_ptr));
1176 #define   mpn_preinv_dc_divappr_q __MPN(preinv_dc_divappr_q)
1177 mp_limb_t mpn_preinv_dc_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr));
1178 #define   mpn_mu_div_qr __MPN(mu_div_qr)
1179 mp_limb_t mpn_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1180 #define   mpn_mu_div_qr_itch __MPN(mu_div_qr_itch)
1181 mp_size_t mpn_mu_div_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
1182 #define   mpn_mu_div_qr_choose_in __MPN(mu_div_qr_choose_in)
1183 mp_size_t mpn_mu_div_qr_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
1184 #define   mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr)
1185 void      mpn_preinv_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1186 #define   mpn_mu_divappr_q __MPN(mu_divappr_q)
1187 mp_limb_t mpn_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1188 #define   mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch)
1189 mp_size_t mpn_mu_divappr_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
1190 #define   mpn_mu_divappr_q_choose_in __MPN(mu_divappr_q_choose_in)
1191 mp_size_t mpn_mu_divappr_q_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
1192 #define   mpn_preinv_mu_divappr_q __MPN(preinv_mu_divappr_q)
1193 void      mpn_preinv_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1194 #define   mpn_mu_div_q __MPN(mu_div_q)
1195 mp_limb_t mpn_mu_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1196 #define   mpn_invert __MPN(invert)
1197 void      mpn_invert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1198 #define   mpn_invert_itch __MPN(invert_itch)
1199 mp_size_t mpn_invert_itch __GMP_PROTO ((mp_size_t));
1200
1201 #define   mpn_binvert __MPN(binvert)
1202 void      mpn_binvert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1203 #define   mpn_binvert_itch __MPN(binvert_itch)
1204 mp_size_t mpn_binvert_itch __GMP_PROTO ((mp_size_t));
1205 #define   mpn_sb_bdiv_qr __MPN(sb_bdiv_qr)
1206 mp_limb_t mpn_sb_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1207 #define   mpn_sb_bdiv_q __MPN(sb_bdiv_q)
1208 void      mpn_sb_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1209 #define   mpn_dc_bdiv_qr __MPN(dc_bdiv_qr)
1210 mp_limb_t mpn_dc_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1211 #define   mpn_dc_bdiv_qr_n_itch __MPN(dc_bdiv_qr_n_itch)
1212 mp_size_t mpn_dc_bdiv_qr_n_itch __GMP_PROTO ((mp_size_t));
1213 #define   mpn_dc_bdiv_qr_n __MPN(dc_bdiv_qr_n)
1214 mp_limb_t mpn_dc_bdiv_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
1215 #define   mpn_dc_bdiv_q __MPN(dc_bdiv_q)
1216 void      mpn_dc_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1217 #define   mpn_dc_bdiv_q_n_itch __MPN(dc_bdiv_q_n_itch)
1218 mp_size_t mpn_dc_bdiv_q_n_itch __GMP_PROTO ((mp_size_t));
1219 #define   mpn_dc_bdiv_q_n __MPN(dc_bdiv_q_n)
1220 void      mpn_dc_bdiv_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
1221 #define   mpn_mu_bdiv_qr __MPN(mu_bdiv_qr)
1222 void      mpn_mu_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1223 #define   mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch)
1224 mp_size_t mpn_mu_bdiv_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1225 #define   mpn_mu_bdiv_q __MPN(mu_bdiv_q)
1226 void      mpn_mu_bdiv_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1227 #define   mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch)
1228 mp_size_t mpn_mu_bdiv_q_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1229
1230 #define   mpn_divexact __MPN(divexact)
1231 void      mpn_divexact __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1232 #define   mpn_divexact_itch __MPN(divexact_itch)
1233 mp_size_t mpn_divexact_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1234
1235
1236 #define   mpn_bdiv_dbm1c __MPN(bdiv_dbm1c)
1237 mp_limb_t mpn_bdiv_dbm1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
1238 #define   mpn_bdiv_dbm1(dst, src, size, divisor) \
1239   mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0))
1240
1241 #define   mpn_powm __MPN(powm)
1242 void      mpn_powm __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1243 #define   mpn_powlo __MPN(powlo)
1244 void      mpn_powlo __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1245
1246 #define   mpn_powm_sec __MPN(powm_sec)
1247 void      mpn_powm_sec __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1248 #define   mpn_subcnd_n __MPN(subcnd_n)
1249 mp_limb_t mpn_subcnd_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
1250 #define   mpn_tabselect __MPN(tabselect)
1251 void      mpn_tabselect __GMP_PROTO ((volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t));
1252
1253 #ifndef DIVEXACT_BY3_METHOD
1254 #if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c)
1255 #define DIVEXACT_BY3_METHOD 0   /* default to using mpn_bdiv_dbm1c */
1256 #else
1257 #define DIVEXACT_BY3_METHOD 1
1258 #endif
1259 #endif
1260
1261 #if DIVEXACT_BY3_METHOD == 0
1262 #undef mpn_divexact_by3
1263 #define mpn_divexact_by3(dst,src,size) \
1264   (3 & mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3)))
1265 /* override mpn_divexact_by3c defined in gmp.h */
1266 /*
1267 #undef mpn_divexact_by3c
1268 #define mpn_divexact_by3c(dst,src,size,cy) \
1269   (3 & mpn_bdiv_dbm1c (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * cy)))
1270 */
1271 #endif
1272
1273 #if GMP_NUMB_BITS % 4 == 0
1274 #define mpn_divexact_by5(dst,src,size) \
1275   (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5)))
1276 #endif
1277
1278 #if GMP_NUMB_BITS % 6 == 0
1279 #define mpn_divexact_by7(dst,src,size) \
1280   (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7)))
1281 #endif
1282
1283 #if GMP_NUMB_BITS % 6 == 0
1284 #define mpn_divexact_by9(dst,src,size) \
1285   (15 & 7 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 9)))
1286 #endif
1287
1288 #if GMP_NUMB_BITS % 10 == 0
1289 #define mpn_divexact_by11(dst,src,size) \
1290   (15 & 5 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 11)))
1291 #endif
1292
1293 #if GMP_NUMB_BITS % 12 == 0
1294 #define mpn_divexact_by13(dst,src,size) \
1295   (15 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 13)))
1296 #endif
1297
1298 #if GMP_NUMB_BITS % 4 == 0
1299 #define mpn_divexact_by15(dst,src,size) \
1300   (15 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 15)))
1301 #endif
1302
1303 #define mpz_divexact_gcd  __gmpz_divexact_gcd
1304 void    mpz_divexact_gcd __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
1305
1306 #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
1307 #ifdef _GMP_H_HAVE_FILE
1308 size_t  mpz_inp_str_nowhite __GMP_PROTO ((mpz_ptr, FILE *, int, int, size_t));
1309 #endif
1310
1311 #define mpn_divisible_p __MPN(divisible_p)
1312 int     mpn_divisible_p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
1313
1314 #define   mpn_rootrem __MPN(rootrem)
1315 mp_size_t mpn_rootrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
1316
1317
1318 #if defined (_CRAY)
1319 #define MPN_COPY_INCR(dst, src, n)                                      \
1320   do {                                                                  \
1321     int __i;            /* Faster on some Crays with plain int */       \
1322     _Pragma ("_CRI ivdep");                                             \
1323     for (__i = 0; __i < (n); __i++)                                     \
1324       (dst)[__i] = (src)[__i];                                          \
1325   } while (0)
1326 #endif
1327
1328 /* used by test programs, hence __GMP_DECLSPEC */
1329 #ifndef mpn_copyi  /* if not done with cpuvec in a fat binary */
1330 #define mpn_copyi __MPN(copyi)
1331 __GMP_DECLSPEC void mpn_copyi __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1332 #endif
1333
1334 #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
1335 #define MPN_COPY_INCR(dst, src, size)                   \
1336   do {                                                  \
1337     ASSERT ((size) >= 0);                               \
1338     ASSERT (MPN_SAME_OR_INCR_P (dst, src, size));       \
1339     mpn_copyi (dst, src, size);                         \
1340   } while (0)
1341 #endif
1342
1343 /* Copy N limbs from SRC to DST incrementing, N==0 allowed.  */
1344 #if ! defined (MPN_COPY_INCR)
1345 #define MPN_COPY_INCR(dst, src, n)                      \
1346   do {                                                  \
1347     ASSERT ((n) >= 0);                                  \
1348     ASSERT (MPN_SAME_OR_INCR_P (dst, src, n));          \
1349     if ((n) != 0)                                       \
1350       {                                                 \
1351         mp_size_t __n = (n) - 1;                        \
1352         mp_ptr __dst = (dst);                           \
1353         mp_srcptr __src = (src);                        \
1354         mp_limb_t __x;                                  \
1355         __x = *__src++;                                 \
1356         if (__n != 0)                                   \
1357           {                                             \
1358             do                                          \
1359               {                                         \
1360                 *__dst++ = __x;                         \
1361                 __x = *__src++;                         \
1362               }                                         \
1363             while (--__n);                              \
1364           }                                             \
1365         *__dst++ = __x;                                 \
1366       }                                                 \
1367   } while (0)
1368 #endif
1369
1370
1371 #if defined (_CRAY)
1372 #define MPN_COPY_DECR(dst, src, n)                                      \
1373   do {                                                                  \
1374     int __i;            /* Faster on some Crays with plain int */       \
1375     _Pragma ("_CRI ivdep");                                             \
1376     for (__i = (n) - 1; __i >= 0; __i--)                                \
1377       (dst)[__i] = (src)[__i];                                          \
1378   } while (0)
1379 #endif
1380
1381 /* used by test programs, hence __GMP_DECLSPEC */
1382 #ifndef mpn_copyd  /* if not done with cpuvec in a fat binary */
1383 #define mpn_copyd __MPN(copyd)
1384 __GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1385 #endif
1386
1387 #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
1388 #define MPN_COPY_DECR(dst, src, size)                   \
1389   do {                                                  \
1390     ASSERT ((size) >= 0);                               \
1391     ASSERT (MPN_SAME_OR_DECR_P (dst, src, size));       \
1392     mpn_copyd (dst, src, size);                         \
1393   } while (0)
1394 #endif
1395
1396 /* Copy N limbs from SRC to DST decrementing, N==0 allowed.  */
1397 #if ! defined (MPN_COPY_DECR)
1398 #define MPN_COPY_DECR(dst, src, n)                      \
1399   do {                                                  \
1400     ASSERT ((n) >= 0);                                  \
1401     ASSERT (MPN_SAME_OR_DECR_P (dst, src, n));          \
1402     if ((n) != 0)                                       \
1403       {                                                 \
1404         mp_size_t __n = (n) - 1;                        \
1405         mp_ptr __dst = (dst) + __n;                     \
1406         mp_srcptr __src = (src) + __n;                  \
1407         mp_limb_t __x;                                  \
1408         __x = *__src--;                                 \
1409         if (__n != 0)                                   \
1410           {                                             \
1411             do                                          \
1412               {                                         \
1413                 *__dst-- = __x;                         \
1414                 __x = *__src--;                         \
1415               }                                         \
1416             while (--__n);                              \
1417           }                                             \
1418         *__dst-- = __x;                                 \
1419       }                                                 \
1420   } while (0)
1421 #endif
1422
1423
1424 #ifndef MPN_COPY
1425 #define MPN_COPY(d,s,n)                         \
1426   do {                                          \
1427     ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n));  \
1428     MPN_COPY_INCR (d, s, n);                    \
1429   } while (0)
1430 #endif
1431
1432
1433 /* Set {dst,size} to the limbs of {src,size} in reverse order. */
1434 #define MPN_REVERSE(dst, src, size)                     \
1435   do {                                                  \
1436     mp_ptr     __dst = (dst);                           \
1437     mp_size_t  __size = (size);                         \
1438     mp_srcptr  __src = (src) + __size - 1;              \
1439     mp_size_t  __i;                                     \
1440     ASSERT ((size) >= 0);                               \
1441     ASSERT (! MPN_OVERLAP_P (dst, size, src, size));    \
1442     CRAY_Pragma ("_CRI ivdep");                         \
1443     for (__i = 0; __i < __size; __i++)                  \
1444       {                                                 \
1445         *__dst = *__src;                                \
1446         __dst++;                                        \
1447         __src--;                                        \
1448       }                                                 \
1449   } while (0)
1450
1451
1452 /* Zero n limbs at dst.
1453
1454    For power and powerpc we want an inline stu/bdnz loop for zeroing.  On
1455    ppc630 for instance this is optimal since it can sustain only 1 store per
1456    cycle.
1457
1458    gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the
1459    "for" loop in the generic code below can become stu/bdnz.  The do/while
1460    here helps it get to that.  The same caveat about plain -mpowerpc64 mode
1461    applies here as to __GMPN_COPY_INCR in gmp.h.
1462
1463    xlc 3.1 already generates stu/bdnz from the generic C, and does so from
1464    this loop too.
1465
1466    Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines
1467    at a time.  MPN_ZERO isn't all that important in GMP, so it might be more
1468    trouble than it's worth to do the same, though perhaps a call to memset
1469    would be good when on a GNU system.  */
1470
1471 #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
1472 #define MPN_ZERO(dst, n)                        \
1473   do {                                          \
1474     ASSERT ((n) >= 0);                          \
1475     if ((n) != 0)                               \
1476       {                                         \
1477         mp_ptr __dst = (dst) - 1;               \
1478         mp_size_t __n = (n);                    \
1479         do                                      \
1480           *++__dst = 0;                         \
1481         while (--__n);                          \
1482       }                                         \
1483   } while (0)
1484 #endif
1485
1486 #ifndef MPN_ZERO
1487 #define MPN_ZERO(dst, n)                        \
1488   do {                                          \
1489     ASSERT ((n) >= 0);                          \
1490     if ((n) != 0)                               \
1491       {                                         \
1492         mp_ptr __dst = (dst);                   \
1493         mp_size_t __n = (n);                    \
1494         do                                      \
1495           *__dst++ = 0;                         \
1496         while (--__n);                          \
1497       }                                         \
1498   } while (0)
1499 #endif
1500
1501
1502 /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
1503    start up and would need to strip a lot of zeros before it'd be faster
1504    than a simple cmpl loop.  Here are some times in cycles for
1505    std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
1506    low zeros).
1507
1508                 std   cld
1509            P5    18    16
1510            P6    46    38
1511            K6    36    13
1512            K7    21    20
1513 */
1514 #ifndef MPN_NORMALIZE
1515 #define MPN_NORMALIZE(DST, NLIMBS) \
1516   do {                                                                  \
1517     while ((NLIMBS) > 0)                                                \
1518       {                                                                 \
1519         if ((DST)[(NLIMBS) - 1] != 0)                                   \
1520           break;                                                        \
1521         (NLIMBS)--;                                                     \
1522       }                                                                 \
1523   } while (0)
1524 #endif
1525 #ifndef MPN_NORMALIZE_NOT_ZERO
1526 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS)     \
1527   do {                                          \
1528     ASSERT ((NLIMBS) >= 1);                     \
1529     while (1)                                   \
1530       {                                         \
1531         if ((DST)[(NLIMBS) - 1] != 0)           \
1532           break;                                \
1533         (NLIMBS)--;                             \
1534       }                                         \
1535   } while (0)
1536 #endif
1537
1538 /* Strip least significant zero limbs from {ptr,size} by incrementing ptr
1539    and decrementing size.  low should be ptr[0], and will be the new ptr[0]
1540    on returning.  The number in {ptr,size} must be non-zero, ie. size!=0 and
1541    somewhere a non-zero limb.  */
1542 #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low)    \
1543   do {                                                  \
1544     ASSERT ((size) >= 1);                               \
1545     ASSERT ((low) == (ptr)[0]);                         \
1546                                                         \
1547     while ((low) == 0)                                  \
1548       {                                                 \
1549         (size)--;                                       \
1550         ASSERT ((size) >= 1);                           \
1551         (ptr)++;                                        \
1552         (low) = *(ptr);                                 \
1553       }                                                 \
1554   } while (0)
1555
1556 /* Initialize X of type mpz_t with space for NLIMBS limbs.  X should be a
1557    temporary variable; it will be automatically cleared out at function
1558    return.  We use __x here to make it possible to accept both mpz_ptr and
1559    mpz_t arguments.  */
1560 #define MPZ_TMP_INIT(X, NLIMBS)                                         \
1561   do {                                                                  \
1562     mpz_ptr __x = (X);                                                  \
1563     ASSERT ((NLIMBS) >= 1);                                             \
1564     __x->_mp_alloc = (NLIMBS);                                          \
1565     __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB);     \
1566   } while (0)
1567
1568 /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs.  */
1569 #define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z))     \
1570                           ? (mp_ptr) _mpz_realloc(z,n)  \
1571                           : PTR(z))
1572
1573 #define MPZ_EQUAL_1_P(z)  (SIZ(z)==1 && PTR(z)[0] == 1)
1574
1575
1576 /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
1577    f1p.
1578
1579    From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
1580    nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio.  So the
1581    number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
1582
1583    The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
1584    without good floating point.  There's +2 for rounding up, and a further
1585    +2 since at the last step x limbs are doubled into a 2x+1 limb region
1586    whereas the actual F[2k] value might be only 2x-1 limbs.
1587
1588    Note that a division is done first, since on a 32-bit system it's at
1589    least conceivable to go right up to n==ULONG_MAX.  (F[2^32-1] would be
1590    about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
1591    whatever a multiply of two 190Mbyte numbers takes.)
1592
1593    Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
1594    worked into the multiplier.  */
1595
1596 #define MPN_FIB2_SIZE(n) \
1597   ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
1598
1599
1600 /* FIB_TABLE(n) returns the Fibonacci number F[n].  Must have n in the range
1601    -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h).
1602
1603    FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] =
1604    F[n] + 2*F[n-1] fits in a limb.  */
1605
1606 __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
1607 #define FIB_TABLE(n)  (__gmp_fib_table[(n)+1])
1608
1609
1610 /* For a threshold between algorithms A and B, size>=thresh is where B
1611    should be used.  Special value MP_SIZE_T_MAX means only ever use A, or
1612    value 0 means only ever use B.  The tests for these special values will
1613    be compile-time constants, so the compiler should be able to eliminate
1614    the code for the unwanted algorithm.  */
1615
1616 #define ABOVE_THRESHOLD(size,thresh)    \
1617   ((thresh) == 0                        \
1618    || ((thresh) != MP_SIZE_T_MAX        \
1619        && (size) >= (thresh)))
1620 #define BELOW_THRESHOLD(size,thresh)  (! ABOVE_THRESHOLD (size, thresh))
1621
1622 /* Usage: int  use_foo = BELOW_THRESHOLD (size, FOO_THRESHOLD);
1623           ...
1624           if (CACHED_BELOW_THRESHOLD (use_foo, size, FOO_THRESHOLD))
1625
1626    When "use_foo" is a constant (thresh is 0 or MP_SIZE_T), gcc prior to
1627    version 3.3 doesn't optimize away a test "if (use_foo)" when within a
1628    loop.  CACHED_BELOW_THRESHOLD helps it do so.  */
1629
1630 #define CACHED_ABOVE_THRESHOLD(cache, thresh)           \
1631   ((thresh) == 0 || (thresh) == MP_SIZE_T_MAX           \
1632    ? ABOVE_THRESHOLD (0, thresh)                        \
1633    : (cache))
1634 #define CACHED_BELOW_THRESHOLD(cache, thresh)           \
1635   ((thresh) == 0 || (thresh) == MP_SIZE_T_MAX           \
1636    ? BELOW_THRESHOLD (0, thresh)                        \
1637    : (cache))
1638
1639
1640 /* If MUL_KARATSUBA_THRESHOLD is not already defined, define it to a
1641    value which is good on most machines.  */
1642 #ifndef MUL_KARATSUBA_THRESHOLD
1643 #define MUL_KARATSUBA_THRESHOLD 32
1644 #endif
1645
1646 /* If MUL_TOOM3_THRESHOLD is not already defined, define it to a
1647    value which is good on most machines.  */
1648 #ifndef MUL_TOOM3_THRESHOLD
1649 #define MUL_TOOM3_THRESHOLD 128
1650 #endif
1651
1652 #ifndef MUL_TOOM44_THRESHOLD
1653 #define MUL_TOOM44_THRESHOLD 500
1654 #endif
1655
1656 /* Source compatibility while source is in flux.  */
1657 #define MUL_TOOM22_THRESHOLD MUL_KARATSUBA_THRESHOLD
1658 #define MUL_TOOM33_THRESHOLD MUL_TOOM3_THRESHOLD
1659 #define SQR_TOOM2_THRESHOLD SQR_KARATSUBA_THRESHOLD
1660
1661 /* MUL_KARATSUBA_THRESHOLD_LIMIT is the maximum for MUL_KARATSUBA_THRESHOLD.
1662    In a normal build MUL_KARATSUBA_THRESHOLD is a constant and we use that.
1663    In a fat binary or tune program build MUL_KARATSUBA_THRESHOLD is a
1664    variable and a separate hard limit will have been defined.  Similarly for
1665    TOOM3.  */
1666 #ifndef MUL_KARATSUBA_THRESHOLD_LIMIT
1667 #define MUL_KARATSUBA_THRESHOLD_LIMIT  MUL_KARATSUBA_THRESHOLD
1668 #endif
1669 #ifndef MUL_TOOM3_THRESHOLD_LIMIT
1670 #define MUL_TOOM3_THRESHOLD_LIMIT  MUL_TOOM3_THRESHOLD
1671 #endif
1672 #ifndef MULLOW_BASECASE_THRESHOLD_LIMIT
1673 #define MULLOW_BASECASE_THRESHOLD_LIMIT  MULLOW_BASECASE_THRESHOLD
1674 #endif
1675
1676 /* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
1677    mpn_mul_basecase in mpn_sqr_n.  Default is to use mpn_sqr_basecase
1678    always.  (Note that we certainly always want it if there's a native
1679    assembler mpn_sqr_basecase.)
1680
1681    If it turns out that mpn_kara_sqr_n becomes faster than mpn_mul_basecase
1682    before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the
1683    karatsuba threshold and SQR_KARATSUBA_THRESHOLD is 0.  This oddity arises
1684    more or less because SQR_KARATSUBA_THRESHOLD represents the size up to
1685    which mpn_sqr_basecase should be used, and that may be never.  */
1686
1687 #ifndef SQR_BASECASE_THRESHOLD
1688 #define SQR_BASECASE_THRESHOLD 0
1689 #endif
1690
1691 #ifndef SQR_KARATSUBA_THRESHOLD
1692 #define SQR_KARATSUBA_THRESHOLD (2*MUL_KARATSUBA_THRESHOLD)
1693 #endif
1694
1695 #ifndef SQR_TOOM3_THRESHOLD
1696 #define SQR_TOOM3_THRESHOLD 128
1697 #endif
1698
1699 #ifndef SQR_TOOM4_THRESHOLD
1700 #define SQR_TOOM4_THRESHOLD 500
1701 #endif
1702
1703 /* See comments above about MUL_TOOM3_THRESHOLD_LIMIT.  */
1704 #ifndef SQR_TOOM3_THRESHOLD_LIMIT
1705 #define SQR_TOOM3_THRESHOLD_LIMIT  SQR_TOOM3_THRESHOLD
1706 #endif
1707
1708 #ifndef DC_DIV_QR_THRESHOLD
1709 #define DC_DIV_QR_THRESHOLD       43
1710 #endif
1711
1712 #ifndef DC_DIVAPPR_Q_THRESHOLD
1713 #define DC_DIVAPPR_Q_THRESHOLD   208
1714 #endif
1715
1716 #ifndef DC_DIV_Q_THRESHOLD
1717 #define DC_DIV_Q_THRESHOLD       228
1718 #endif
1719
1720 #ifndef DC_BDIV_QR_THRESHOLD
1721 #define DC_BDIV_QR_THRESHOLD      52
1722 #endif
1723
1724 #ifndef DC_BDIV_Q_THRESHOLD
1725 #define DC_BDIV_Q_THRESHOLD      224
1726 #endif
1727
1728 #ifndef DIVEXACT_JEB_THRESHOLD
1729 #define DIVEXACT_JEB_THRESHOLD    25
1730 #endif
1731
1732 #ifndef INV_NEWTON_THRESHOLD
1733 #define INV_NEWTON_THRESHOLD     654
1734 #endif
1735
1736 #ifndef BINV_NEWTON_THRESHOLD
1737 #define BINV_NEWTON_THRESHOLD    807
1738 #endif
1739
1740 #ifndef MU_DIVAPPR_Q_THRESHOLD
1741 #define MU_DIVAPPR_Q_THRESHOLD  4000
1742 #endif
1743
1744 #ifndef MU_DIV_Q_THRESHOLD
1745 #define MU_DIV_Q_THRESHOLD      4000
1746 #endif
1747
1748 #ifndef MU_BDIV_Q_THRESHOLD
1749 #define MU_BDIV_Q_THRESHOLD     2000
1750 #endif
1751
1752 /* First k to use for an FFT modF multiply.  A modF FFT is an order
1753    log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
1754    whereas k=4 is 1.33 which is faster than toom3 at 1.485.    */
1755 #define FFT_FIRST_K  4
1756
1757 /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
1758 #ifndef MUL_FFT_MODF_THRESHOLD
1759 #define MUL_FFT_MODF_THRESHOLD   (MUL_TOOM3_THRESHOLD * 3)
1760 #endif
1761 #ifndef SQR_FFT_MODF_THRESHOLD
1762 #define SQR_FFT_MODF_THRESHOLD   (SQR_TOOM3_THRESHOLD * 3)
1763 #endif
1764
1765 /* Threshold at which FFT should be used to do an NxN -> 2N multiply.  This
1766    will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
1767    NxN->2N multiply and not recursing into itself is an order
1768    log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
1769    is the first better than toom3.  */
1770 #ifndef MUL_FFT_THRESHOLD
1771 #define MUL_FFT_THRESHOLD   (MUL_FFT_MODF_THRESHOLD * 10)
1772 #endif
1773 #ifndef SQR_FFT_THRESHOLD
1774 #define SQR_FFT_THRESHOLD   (SQR_FFT_MODF_THRESHOLD * 10)
1775 #endif
1776
1777 /* Table of thresholds for successive modF FFT "k"s.  The first entry is
1778    where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
1779    etc.  See mpn_fft_best_k(). */
1780 #ifndef MUL_FFT_TABLE
1781 #define MUL_FFT_TABLE                           \
1782   { MUL_TOOM3_THRESHOLD * 4,   /* k=5 */        \
1783     MUL_TOOM3_THRESHOLD * 8,   /* k=6 */        \
1784     MUL_TOOM3_THRESHOLD * 16,  /* k=7 */        \
1785     MUL_TOOM3_THRESHOLD * 32,  /* k=8 */        \
1786     MUL_TOOM3_THRESHOLD * 96,  /* k=9 */        \
1787     MUL_TOOM3_THRESHOLD * 288, /* k=10 */       \
1788     0 }
1789 #endif
1790 #ifndef SQR_FFT_TABLE
1791 #define SQR_FFT_TABLE                           \
1792   { SQR_TOOM3_THRESHOLD * 4,   /* k=5 */        \
1793     SQR_TOOM3_THRESHOLD * 8,   /* k=6 */        \
1794     SQR_TOOM3_THRESHOLD * 16,  /* k=7 */        \
1795     SQR_TOOM3_THRESHOLD * 32,  /* k=8 */        \
1796     SQR_TOOM3_THRESHOLD * 96,  /* k=9 */        \
1797     SQR_TOOM3_THRESHOLD * 288, /* k=10 */       \
1798     0 }
1799 #endif
1800
1801 #ifndef FFT_TABLE_ATTRS
1802 #define FFT_TABLE_ATTRS   static const
1803 #endif
1804
1805 #define MPN_FFT_TABLE_SIZE  16
1806
1807
1808 /* mpn_dc_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
1809    div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
1810    i.e. n/2 >= MUL_KARATSUBA_THRESHOLD
1811
1812    Measured values are between 2 and 4 times MUL_KARATSUBA_THRESHOLD, so go
1813    for 3 as an average.  */
1814
1815 #ifndef DIV_DC_THRESHOLD
1816 #define DIV_DC_THRESHOLD    (3 * MUL_KARATSUBA_THRESHOLD)
1817 #endif
1818
1819 #ifndef GET_STR_DC_THRESHOLD
1820 #define GET_STR_DC_THRESHOLD             18
1821 #endif
1822
1823 #ifndef GET_STR_PRECOMPUTE_THRESHOLD
1824 #define GET_STR_PRECOMPUTE_THRESHOLD     35
1825 #endif
1826
1827 #ifndef SET_STR_DC_THRESHOLD
1828 #define SET_STR_DC_THRESHOLD            750
1829 #endif
1830
1831 #ifndef SET_STR_PRECOMPUTE_THRESHOLD
1832 #define SET_STR_PRECOMPUTE_THRESHOLD   2000
1833 #endif
1834
1835 /* Return non-zero if xp,xsize and yp,ysize overlap.
1836    If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
1837    overlap.  If both these are false, there's an overlap. */
1838 #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
1839   ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
1840 #define MEM_OVERLAP_P(xp, xsize, yp, ysize)     \
1841   (   (char *) (xp) + (xsize) > (char *) (yp)   \
1842    && (char *) (yp) + (ysize) > (char *) (xp))
1843
1844 /* Return non-zero if xp,xsize and yp,ysize are either identical or not
1845    overlapping.  Return zero if they're partially overlapping. */
1846 #define MPN_SAME_OR_SEPARATE_P(xp, yp, size)    \
1847   MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
1848 #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize)           \
1849   ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
1850
1851 /* Return non-zero if dst,dsize and src,ssize are either identical or
1852    overlapping in a way suitable for an incrementing/decrementing algorithm.
1853    Return zero if they're partially overlapping in an unsuitable fashion. */
1854 #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize)             \
1855   ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
1856 #define MPN_SAME_OR_INCR_P(dst, src, size)      \
1857   MPN_SAME_OR_INCR2_P(dst, size, src, size)
1858 #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize)             \
1859   ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
1860 #define MPN_SAME_OR_DECR_P(dst, src, size)      \
1861   MPN_SAME_OR_DECR2_P(dst, size, src, size)
1862
1863
1864 /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
1865    ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
1866    does it always.  Generally assertions are meant for development, but
1867    might help when looking for a problem later too.
1868
1869    Note that strings shouldn't be used within the ASSERT expression,
1870    eg. ASSERT(strcmp(s,"notgood")!=0), since the quotes upset the "expr"
1871    used in the !HAVE_STRINGIZE case (ie. K&R).  */
1872
1873 #ifdef __LINE__
1874 #define ASSERT_LINE  __LINE__
1875 #else
1876 #define ASSERT_LINE  -1
1877 #endif
1878
1879 #ifdef __FILE__
1880 #define ASSERT_FILE  __FILE__
1881 #else
1882 #define ASSERT_FILE  ""
1883 #endif
1884
1885 void __gmp_assert_header __GMP_PROTO ((const char *, int));
1886 __GMP_DECLSPEC void __gmp_assert_fail __GMP_PROTO ((const char *, int, const char *)) ATTRIBUTE_NORETURN;
1887
1888 #if HAVE_STRINGIZE
1889 #define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
1890 #else
1891 #define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
1892 #endif
1893
1894 #define ASSERT_ALWAYS(expr)     \
1895   do {                          \
1896     if (!(expr))                \
1897       ASSERT_FAIL (expr);       \
1898   } while (0)
1899
1900 #if WANT_ASSERT
1901 #define ASSERT(expr)   ASSERT_ALWAYS (expr)
1902 #else
1903 #define ASSERT(expr)   do {} while (0)
1904 #endif
1905
1906
1907 /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
1908    that it's zero.  In both cases if assertion checking is disabled the
1909    expression is still evaluated.  These macros are meant for use with
1910    routines like mpn_add_n() where the return value represents a carry or
1911    whatever that should or shouldn't occur in some context.  For example,
1912    ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
1913 #if WANT_ASSERT
1914 #define ASSERT_CARRY(expr)     ASSERT_ALWAYS ((expr) != 0)
1915 #define ASSERT_NOCARRY(expr)   ASSERT_ALWAYS ((expr) == 0)
1916 #else
1917 #define ASSERT_CARRY(expr)     (expr)
1918 #define ASSERT_NOCARRY(expr)   (expr)
1919 #endif
1920
1921
1922 /* ASSERT_CODE includes code when assertion checking is wanted.  This is the
1923    same as writing "#if WANT_ASSERT", but more compact.  */
1924 #if WANT_ASSERT
1925 #define ASSERT_CODE(expr)  expr
1926 #else
1927 #define ASSERT_CODE(expr)
1928 #endif
1929
1930
1931 /* Test that an mpq_t is in fully canonical form.  This can be used as
1932    protection on routines like mpq_equal which give wrong results on
1933    non-canonical inputs.  */
1934 #if WANT_ASSERT
1935 #define ASSERT_MPQ_CANONICAL(q)                         \
1936   do {                                                  \
1937     ASSERT (q->_mp_den._mp_size > 0);                   \
1938     if (q->_mp_num._mp_size == 0)                       \
1939       {                                                 \
1940         /* zero should be 0/1 */                        \
1941         ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0);   \
1942       }                                                 \
1943     else                                                \
1944       {                                                 \
1945         /* no common factors */                         \
1946         mpz_t  __g;                                     \
1947         mpz_init (__g);                                 \
1948         mpz_gcd (__g, mpq_numref(q), mpq_denref(q));    \
1949         ASSERT (mpz_cmp_ui (__g, 1) == 0);              \
1950         mpz_clear (__g);                                \
1951       }                                                 \
1952   } while (0)
1953 #else
1954 #define ASSERT_MPQ_CANONICAL(q)  do {} while (0)
1955 #endif
1956
1957 /* Check that the nail parts are zero. */
1958 #define ASSERT_ALWAYS_LIMB(limb)                \
1959   do {                                          \
1960     mp_limb_t  __nail = (limb) & GMP_NAIL_MASK; \
1961     ASSERT_ALWAYS (__nail == 0);                \
1962   } while (0)
1963 #define ASSERT_ALWAYS_MPN(ptr, size)            \
1964   do {                                          \
1965     /* let whole loop go dead when no nails */  \
1966     if (GMP_NAIL_BITS != 0)                     \
1967       {                                         \
1968         mp_size_t  __i;                         \
1969         for (__i = 0; __i < (size); __i++)      \
1970           ASSERT_ALWAYS_LIMB ((ptr)[__i]);      \
1971       }                                         \
1972   } while (0)
1973 #if WANT_ASSERT
1974 #define ASSERT_LIMB(limb)       ASSERT_ALWAYS_LIMB (limb)
1975 #define ASSERT_MPN(ptr, size)   ASSERT_ALWAYS_MPN (ptr, size)
1976 #else
1977 #define ASSERT_LIMB(limb)       do {} while (0)
1978 #define ASSERT_MPN(ptr, size)   do {} while (0)
1979 #endif
1980
1981
1982 /* Assert that an mpn region {ptr,size} is zero, or non-zero.
1983    size==0 is allowed, and in that case {ptr,size} considered to be zero.  */
1984 #if WANT_ASSERT
1985 #define ASSERT_MPN_ZERO_P(ptr,size)     \
1986   do {                                  \
1987     mp_size_t  __i;                     \
1988     ASSERT ((size) >= 0);               \
1989     for (__i = 0; __i < (size); __i++)  \
1990       ASSERT ((ptr)[__i] == 0);         \
1991   } while (0)
1992 #define ASSERT_MPN_NONZERO_P(ptr,size)  \
1993   do {                                  \
1994     mp_size_t  __i;                     \
1995     int        __nonzero = 0;           \
1996     ASSERT ((size) >= 0);               \
1997     for (__i = 0; __i < (size); __i++)  \
1998       if ((ptr)[__i] != 0)              \
1999         {                               \
2000           __nonzero = 1;                \
2001           break;                        \
2002         }                               \
2003     ASSERT (__nonzero);                 \
2004   } while (0)
2005 #else
2006 #define ASSERT_MPN_ZERO_P(ptr,size)     do {} while (0)
2007 #define ASSERT_MPN_NONZERO_P(ptr,size)  do {} while (0)
2008 #endif
2009
2010
2011 #if HAVE_NATIVE_mpn_com_n
2012 #define mpn_com_n __MPN(com_n)
2013 void    mpn_com_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
2014 #else
2015 #define mpn_com_n(d,s,n)                                \
2016   do {                                                  \
2017     mp_ptr     __d = (d);                               \
2018     mp_srcptr  __s = (s);                               \
2019     mp_size_t  __n = (n);                               \
2020     ASSERT (__n >= 1);                                  \
2021     ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n));    \
2022     do                                                  \
2023       *__d++ = (~ *__s++) & GMP_NUMB_MASK;              \
2024     while (--__n);                                      \
2025   } while (0)
2026 #endif
2027
2028 #define MPN_LOGOPS_N_INLINE(d, s1, s2, n, operation)    \
2029   do {                                                  \
2030     mp_ptr       __d = (d);                             \
2031     mp_srcptr    __s1 = (s1);                           \
2032     mp_srcptr    __s2 = (s2);                           \
2033     mp_size_t    __n = (n);                             \
2034     ASSERT (__n >= 1);                                  \
2035     ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s1, __n));   \
2036     ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s2, __n));   \
2037     do                                                  \
2038       operation;                                        \
2039     while (--__n);                                      \
2040   } while (0)
2041
2042 #if HAVE_NATIVE_mpn_and_n
2043 #define mpn_and_n __MPN(and_n)
2044 void    mpn_and_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2045 #else
2046 #define mpn_and_n(d, s1, s2, n) \
2047   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & *__s2++)
2048 #endif
2049
2050 #if HAVE_NATIVE_mpn_andn_n
2051 #define mpn_andn_n __MPN(andn_n)
2052 void    mpn_andn_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2053 #else
2054 #define mpn_andn_n(d, s1, s2, n) \
2055   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & ~*__s2++)
2056 #endif
2057
2058 #if HAVE_NATIVE_mpn_nand_n
2059 #define mpn_nand_n __MPN(nand_n)
2060 void    mpn_nand_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2061 #else
2062 #define mpn_nand_n(d, s1, s2, n) \
2063   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ & *__s2++) & GMP_NUMB_MASK)
2064 #endif
2065
2066 #if HAVE_NATIVE_mpn_ior_n
2067 #define mpn_ior_n __MPN(ior_n)
2068 void    mpn_ior_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2069 #else
2070 #define mpn_ior_n(d, s1, s2, n) \
2071   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ | *__s2++)
2072 #endif
2073
2074 #if HAVE_NATIVE_mpn_iorn_n
2075 #define mpn_iorn_n __MPN(iorn_n)
2076 void    mpn_iorn_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2077 #else
2078 #define mpn_iorn_n(d, s1, s2, n) \
2079   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = (*__s1++ | ~*__s2++) & GMP_NUMB_MASK)
2080 #endif
2081
2082 #if HAVE_NATIVE_mpn_nior_n
2083 #define mpn_nior_n __MPN(nior_n)
2084 void    mpn_nior_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2085 #else
2086 #define mpn_nior_n(d, s1, s2, n) \
2087   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ | *__s2++) & GMP_NUMB_MASK)
2088 #endif
2089
2090 #if HAVE_NATIVE_mpn_xor_n
2091 #define mpn_xor_n __MPN(xor_n)
2092 void    mpn_xor_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2093 #else
2094 #define mpn_xor_n(d, s1, s2, n) \
2095   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ ^ *__s2++)
2096 #endif
2097
2098 #if HAVE_NATIVE_mpn_xnor_n
2099 #define mpn_xnor_n __MPN(xnor_n)
2100 void    mpn_xnor_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2101 #else
2102 #define mpn_xnor_n(d, s1, s2, n) \
2103   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ ^ *__s2++) & GMP_NUMB_MASK)
2104 #endif
2105
2106
2107 /* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
2108 #if GMP_NAIL_BITS == 0
2109 #define ADDC_LIMB(cout, w, x, y)        \
2110   do {                                  \
2111     mp_limb_t  __x = (x);               \
2112     mp_limb_t  __y = (y);               \
2113     mp_limb_t  __w = __x + __y;         \
2114     (w) = __w;                          \
2115     (cout) = __w < __x;                 \
2116   } while (0)
2117 #else
2118 #define ADDC_LIMB(cout, w, x, y)        \
2119   do {                                  \
2120     mp_limb_t  __w;                     \
2121     ASSERT_LIMB (x);                    \
2122     ASSERT_LIMB (y);                    \
2123     __w = (x) + (y);                    \
2124     (w) = __w & GMP_NUMB_MASK;          \
2125     (cout) = __w >> GMP_NUMB_BITS;      \
2126   } while (0)
2127 #endif
2128
2129 /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
2130    subtract.  */
2131 #if GMP_NAIL_BITS == 0
2132 #define SUBC_LIMB(cout, w, x, y)        \
2133   do {                                  \
2134     mp_limb_t  __x = (x);               \
2135     mp_limb_t  __y = (y);               \
2136     mp_limb_t  __w = __x - __y;         \
2137     (w) = __w;                          \
2138     (cout) = __w > __x;                 \
2139   } while (0)
2140 #else
2141 #define SUBC_LIMB(cout, w, x, y)        \
2142   do {                                  \
2143     mp_limb_t  __w = (x) - (y);         \
2144     (w) = __w & GMP_NUMB_MASK;          \
2145     (cout) = __w >> (GMP_LIMB_BITS-1);  \
2146   } while (0)
2147 #endif
2148
2149
2150 /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both
2151    expecting no carry (or borrow) from that.
2152
2153    The size parameter is only for the benefit of assertion checking.  In a
2154    normal build it's unused and the carry/borrow is just propagated as far
2155    as it needs to go.
2156
2157    On random data, usually only one or two limbs of {ptr,size} get updated,
2158    so there's no need for any sophisticated looping, just something compact
2159    and sensible.
2160
2161    FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,
2162    declaring their operand sizes, then remove the former.  This is purely
2163    for the benefit of assertion checking.  */
2164
2165 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 && GMP_NAIL_BITS == 0      \
2166   && BITS_PER_MP_LIMB == 32 && ! defined (NO_ASM) && ! WANT_ASSERT
2167 /* Better flags handling than the generic C gives on i386, saving a few
2168    bytes of code and maybe a cycle or two.  */
2169
2170 #define MPN_IORD_U(ptr, incr, aors)                                     \
2171   do {                                                                  \
2172     mp_ptr  __ptr_dummy;                                                \
2173     if (__builtin_constant_p (incr) && (incr) == 1)                     \
2174       {                                                                 \
2175         __asm__ __volatile__                                            \
2176           ("\n" ASM_L(top) ":\n"                                        \
2177            "\t" aors " $1, (%0)\n"                                      \
2178            "\tleal 4(%0),%0\n"                                          \
2179            "\tjc " ASM_L(top)                                           \
2180            : "=r" (__ptr_dummy)                                         \
2181            : "0"  (ptr)                                                 \
2182            : "memory");                                                 \
2183       }                                                                 \
2184     else                                                                \
2185       {                                                                 \
2186         __asm__ __volatile__                                            \
2187           (   aors  " %2,(%0)\n"                                        \
2188            "\tjnc " ASM_L(done) "\n"                                    \
2189            ASM_L(top) ":\n"                                             \
2190            "\t" aors " $1,4(%0)\n"                                      \
2191            "\tleal 4(%0),%0\n"                                          \
2192            "\tjc " ASM_L(top) "\n"                                      \
2193            ASM_L(done) ":\n"                                            \
2194            : "=r" (__ptr_dummy)                                         \
2195            : "0"  (ptr),                                                \
2196              "ri" (incr)                                                \
2197            : "memory");                                                 \
2198       }                                                                 \
2199   } while (0)
2200
2201 #define MPN_INCR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "addl")
2202 #define MPN_DECR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "subl")
2203 #define mpn_incr_u(ptr, incr)  MPN_INCR_U (ptr, 0, incr)
2204 #define mpn_decr_u(ptr, incr)  MPN_DECR_U (ptr, 0, incr)
2205 #endif
2206
2207 #if GMP_NAIL_BITS == 0
2208 #ifndef mpn_incr_u
2209 #define mpn_incr_u(p,incr)                              \
2210   do {                                                  \
2211     mp_limb_t __x;                                      \
2212     mp_ptr __p = (p);                                   \
2213     if (__builtin_constant_p (incr) && (incr) == 1)     \
2214       {                                                 \
2215         while (++(*(__p++)) == 0)                       \
2216           ;                                             \
2217       }                                                 \
2218     else                                                \
2219       {                                                 \
2220         __x = *__p + (incr);                            \
2221         *__p = __x;                                     \
2222         if (__x < (incr))                               \
2223           while (++(*(++__p)) == 0)                     \
2224             ;                                           \
2225       }                                                 \
2226   } while (0)
2227 #endif
2228 #ifndef mpn_decr_u
2229 #define mpn_decr_u(p,incr)                              \
2230   do {                                                  \
2231     mp_limb_t __x;                                      \
2232     mp_ptr __p = (p);                                   \
2233     if (__builtin_constant_p (incr) && (incr) == 1)     \
2234       {                                                 \
2235         while ((*(__p++))-- == 0)                       \
2236           ;                                             \
2237       }                                                 \
2238     else                                                \
2239       {                                                 \
2240         __x = *__p;                                     \
2241         *__p = __x - (incr);                            \
2242         if (__x < (incr))                               \
2243           while ((*(++__p))-- == 0)                     \
2244             ;                                           \
2245       }                                                 \
2246   } while (0)
2247 #endif
2248 #endif
2249
2250 #if GMP_NAIL_BITS >= 1
2251 #ifndef mpn_incr_u
2252 #define mpn_incr_u(p,incr)                              \
2253   do {                                                  \
2254     mp_limb_t __x;                                      \
2255     mp_ptr __p = (p);                                   \
2256     if (__builtin_constant_p (incr) && (incr) == 1)     \
2257       {                                                 \
2258         do                                              \
2259           {                                             \
2260             __x = (*__p + 1) & GMP_NUMB_MASK;           \
2261             *__p++ = __x;                               \
2262           }                                             \
2263         while (__x == 0);                               \
2264       }                                                 \
2265     else                                                \
2266       {                                                 \
2267         __x = (*__p + (incr));                          \
2268         *__p++ = __x & GMP_NUMB_MASK;                   \
2269         if (__x >> GMP_NUMB_BITS != 0)                  \
2270           {                                             \
2271             do                                          \
2272               {                                         \
2273                 __x = (*__p + 1) & GMP_NUMB_MASK;       \
2274                 *__p++ = __x;                           \
2275               }                                         \
2276             while (__x == 0);                           \
2277           }                                             \
2278       }                                                 \
2279   } while (0)
2280 #endif
2281 #ifndef mpn_decr_u
2282 #define mpn_decr_u(p,incr)                              \
2283   do {                                                  \
2284     mp_limb_t __x;                                      \
2285     mp_ptr __p = (p);                                   \
2286     if (__builtin_constant_p (incr) && (incr) == 1)     \
2287       {                                                 \
2288         do                                              \
2289           {                                             \
2290             __x = *__p;                                 \
2291             *__p++ = (__x - 1) & GMP_NUMB_MASK;         \
2292           }                                             \
2293         while (__x == 0);                               \
2294       }                                                 \
2295     else                                                \
2296       {                                                 \
2297         __x = *__p - (incr);                            \
2298         *__p++ = __x & GMP_NUMB_MASK;                   \
2299         if (__x >> GMP_NUMB_BITS != 0)                  \
2300           {                                             \
2301             do                                          \
2302               {                                         \
2303                 __x = *__p;                             \
2304                 *__p++ = (__x - 1) & GMP_NUMB_MASK;     \
2305               }                                         \
2306             while (__x == 0);                           \
2307           }                                             \
2308       }                                                 \
2309   } while (0)
2310 #endif
2311 #endif
2312
2313 #ifndef MPN_INCR_U
2314 #if WANT_ASSERT
2315 #define MPN_INCR_U(ptr, size, n)                        \
2316   do {                                                  \
2317     ASSERT ((size) >= 1);                               \
2318     ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n));     \
2319   } while (0)
2320 #else
2321 #define MPN_INCR_U(ptr, size, n)   mpn_incr_u (ptr, n)
2322 #endif
2323 #endif
2324
2325 #ifndef MPN_DECR_U
2326 #if WANT_ASSERT
2327 #define MPN_DECR_U(ptr, size, n)                        \
2328   do {                                                  \
2329     ASSERT ((size) >= 1);                               \
2330     ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n));     \
2331   } while (0)
2332 #else
2333 #define MPN_DECR_U(ptr, size, n)   mpn_decr_u (ptr, n)
2334 #endif
2335 #endif
2336
2337
2338 /* Structure for conversion between internal binary format and
2339    strings in base 2..36.  */
2340 struct bases
2341 {
2342   /* Number of digits in the conversion base that always fits in an mp_limb_t.
2343      For example, for base 10 on a machine where a mp_limb_t has 32 bits this
2344      is 9, since 10**9 is the largest number that fits into a mp_limb_t.  */
2345   int chars_per_limb;
2346
2347   /* log(2)/log(conversion_base) */
2348   double chars_per_bit_exactly;
2349
2350   /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
2351      factors of base.  Exception: For 2, 4, 8, etc, big_base is log2(base),
2352      i.e. the number of bits used to represent each digit in the base.  */
2353   mp_limb_t big_base;
2354
2355   /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a
2356      fixed-point number.  Instead of dividing by big_base an application can
2357      choose to multiply by big_base_inverted.  */
2358   mp_limb_t big_base_inverted;
2359 };
2360
2361 #define   mp_bases __MPN(bases)
2362 #define __mp_bases __MPN(bases)
2363 __GMP_DECLSPEC extern const struct bases mp_bases[257];
2364
2365
2366 /* For power of 2 bases this is exact.  For other bases the result is either
2367    exact or one too big.
2368
2369    To be exact always it'd be necessary to examine all the limbs of the
2370    operand, since numbers like 100..000 and 99...999 generally differ only
2371    in the lowest limb.  It'd be possible to examine just a couple of high
2372    limbs to increase the probability of being exact, but that doesn't seem
2373    worth bothering with.  */
2374
2375 #define MPN_SIZEINBASE(result, ptr, size, base)                         \
2376   do {                                                                  \
2377     int       __lb_base, __cnt;                                         \
2378     size_t __totbits;                                                   \
2379                                                                         \
2380     ASSERT ((size) >= 0);                                               \
2381     ASSERT ((base) >= 2);                                               \
2382     ASSERT ((base) < numberof (mp_bases));                              \
2383                                                                         \
2384     /* Special case for X == 0.  */                                     \
2385     if ((size) == 0)                                                    \
2386       (result) = 1;                                                     \
2387     else                                                                \
2388       {                                                                 \
2389         /* Calculate the total number of significant bits of X.  */     \
2390         count_leading_zeros (__cnt, (ptr)[(size)-1]);                   \
2391         __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2392                                                                         \
2393         if (POW2_P (base))                                              \
2394           {                                                             \
2395             __lb_base = mp_bases[base].big_base;                        \
2396             (result) = (__totbits + __lb_base - 1) / __lb_base;         \
2397           }                                                             \
2398         else                                                            \
2399           (result) = (size_t)                                           \
2400             (__totbits * mp_bases[base].chars_per_bit_exactly) + 1;     \
2401       }                                                                 \
2402   } while (0)
2403
2404 /* eliminate mp_bases lookups for base==16 */
2405 #define MPN_SIZEINBASE_16(result, ptr, size)                            \
2406   do {                                                                  \
2407     int       __cnt;                                                    \
2408     mp_size_t __totbits;                                                \
2409                                                                         \
2410     ASSERT ((size) >= 0);                                               \
2411                                                                         \
2412     /* Special case for X == 0.  */                                     \
2413     if ((size) == 0)                                                    \
2414       (result) = 1;                                                     \
2415     else                                                                \
2416       {                                                                 \
2417         /* Calculate the total number of significant bits of X.  */     \
2418         count_leading_zeros (__cnt, (ptr)[(size)-1]);                   \
2419         __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2420         (result) = (__totbits + 4 - 1) / 4;                             \
2421       }                                                                 \
2422   } while (0)
2423
2424 /* bit count to limb count, rounding up */
2425 #define BITS_TO_LIMBS(n)  (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
2426
2427 /* MPN_SET_UI sets an mpn (ptr, cnt) to given ui.  MPZ_FAKE_UI creates fake
2428    mpz_t from ui.  The zp argument must have room for LIMBS_PER_ULONG limbs
2429    in both cases (LIMBS_PER_ULONG is also defined here.) */
2430 #if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */
2431
2432 #define LIMBS_PER_ULONG 1
2433 #define MPN_SET_UI(zp, zn, u)   \
2434   (zp)[0] = (u);                \
2435   (zn) = ((zp)[0] != 0);
2436 #define MPZ_FAKE_UI(z, zp, u)   \
2437   (zp)[0] = (u);                \
2438   PTR (z) = (zp);               \
2439   SIZ (z) = ((zp)[0] != 0);     \
2440   ASSERT_CODE (ALLOC (z) = 1);
2441
2442 #else /* need two limbs per ulong */
2443
2444 #define LIMBS_PER_ULONG 2
2445 #define MPN_SET_UI(zp, zn, u)                          \
2446   (zp)[0] = (u) & GMP_NUMB_MASK;                       \
2447   (zp)[1] = (u) >> GMP_NUMB_BITS;                      \
2448   (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);
2449 #define MPZ_FAKE_UI(z, zp, u)                          \
2450   (zp)[0] = (u) & GMP_NUMB_MASK;                       \
2451   (zp)[1] = (u) >> GMP_NUMB_BITS;                      \
2452   SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \
2453   PTR (z) = (zp);                                      \
2454   ASSERT_CODE (ALLOC (z) = 2);
2455
2456 #endif
2457
2458
2459 #if HAVE_HOST_CPU_FAMILY_x86
2460 #define TARGET_REGISTER_STARVED 1
2461 #else
2462 #define TARGET_REGISTER_STARVED 0
2463 #endif
2464
2465
2466 /* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1
2467    or 0 there into a limb 0xFF..FF or 0 respectively.
2468
2469    On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1,
2470    but C99 doesn't guarantee signed right shifts are arithmetic, so we have
2471    a little compile-time test and a fallback to a "? :" form.  The latter is
2472    necessary for instance on Cray vector systems.
2473
2474    Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this
2475    to an arithmetic right shift anyway, but it's good to get the desired
2476    shift on past versions too (in particular since an important use of
2477    LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv).  */
2478
2479 #define LIMB_HIGHBIT_TO_MASK(n)                                 \
2480   (((mp_limb_signed_t) -1 >> 1) < 0                             \
2481    ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1)              \
2482    : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))
2483
2484
2485 /* Use a library function for invert_limb, if available. */
2486 #define   mpn_invert_limb __MPN(invert_limb)
2487 mp_limb_t mpn_invert_limb __GMP_PROTO ((mp_limb_t)) ATTRIBUTE_CONST;
2488 #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
2489 #define invert_limb(invxl,xl)           \
2490   do {                                  \
2491     (invxl) = mpn_invert_limb (xl);     \
2492   } while (0)
2493 #endif
2494
2495 #ifndef invert_limb
2496 #define invert_limb(invxl,xl)                   \
2497   do {                                          \
2498     mp_limb_t dummy;                            \
2499     ASSERT ((xl) != 0);                         \
2500     udiv_qrnnd (invxl, dummy, ~(xl), ~CNST_LIMB(0), xl);  \
2501   } while (0)
2502 #endif
2503
2504 #ifndef udiv_qrnnd_preinv
2505 #define udiv_qrnnd_preinv udiv_qrnnd_preinv3
2506 #endif
2507
2508 /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
2509    limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
2510    If this would yield overflow, DI should be the largest possible number
2511    (i.e., only ones).  For correct operation, the most significant bit of D
2512    has to be set.  Put the quotient in Q and the remainder in R.  */
2513 #define udiv_qrnnd_preinv1(q, r, nh, nl, d, di)                         \
2514   do {                                                                  \
2515     mp_limb_t _q, _ql, _r;                                              \
2516     mp_limb_t _xh, _xl;                                                 \
2517     ASSERT ((d) != 0);                                                  \
2518     umul_ppmm (_q, _ql, (nh), (di));                                    \
2519     _q += (nh); /* Compensate, di is 2**GMP_LIMB_BITS too small */      \
2520     umul_ppmm (_xh, _xl, _q, (d));                                      \
2521     sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl);                         \
2522     if (_xh != 0)                                                       \
2523       {                                                                 \
2524         sub_ddmmss (_xh, _r, _xh, _r, 0, (d));                          \
2525         _q += 1;                                                        \
2526         if (_xh != 0)                                                   \
2527           {                                                             \
2528             _r -= (d);                                                  \
2529             _q += 1;                                                    \
2530           }                                                             \
2531       }                                                                 \
2532     if (_r >= (d))                                                      \
2533       {                                                                 \
2534         _r -= (d);                                                      \
2535         _q += 1;                                                        \
2536       }                                                                 \
2537     (r) = _r;                                                           \
2538     (q) = _q;                                                           \
2539   } while (0)
2540
2541 /* Like udiv_qrnnd_preinv, but branch-free. */
2542 #define udiv_qrnnd_preinv2(q, r, nh, nl, d, di)                         \
2543   do {                                                                  \
2544     mp_limb_t _n2, _n10, _nmask, _nadj, _q1;                            \
2545     mp_limb_t _xh, _xl;                                                 \
2546     _n2 = (nh);                                                         \
2547     _n10 = (nl);                                                        \
2548     _nmask = LIMB_HIGHBIT_TO_MASK (_n10);                               \
2549     _nadj = _n10 + (_nmask & (d));                                      \
2550     umul_ppmm (_xh, _xl, di, _n2 - _nmask);                             \
2551     add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj);                        \
2552     _q1 = ~_xh;                                                         \
2553     umul_ppmm (_xh, _xl, _q1, d);                                       \
2554     add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);                            \
2555     _xh -= (d);                                 /* xh = 0 or -1 */      \
2556     (r) = _xl + ((d) & _xh);                                            \
2557     (q) = _xh - _q1;                                                    \
2558   } while (0)
2559
2560 /* Like udiv_qrnnd_preinv2, but for for any value D.  DNORM is D shifted left
2561    so that its most significant bit is set.  LGUP is ceil(log2(D)).  */
2562 #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
2563   do {                                                                  \
2564     mp_limb_t _n2, _n10, _nmask, _nadj, _q1;                            \
2565     mp_limb_t _xh, _xl;                                                 \
2566     _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\
2567     _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup));                         \
2568     _nmask = LIMB_HIGHBIT_TO_MASK (_n10);                               \
2569     _nadj = _n10 + (_nmask & (dnorm));                                  \
2570     umul_ppmm (_xh, _xl, di, _n2 - _nmask);                             \
2571     add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj);                        \
2572     _q1 = ~_xh;                                                         \
2573     umul_ppmm (_xh, _xl, _q1, d);                                       \
2574     add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);                            \
2575     _xh -= (d);                                                         \
2576     (r) = _xl + ((d) & _xh);                                            \
2577     (q) = _xh - _q1;                                                    \
2578   } while (0)
2579
2580 /* udiv_qrnnd_preinv3 -- Based on work by Niels Möller and Torbjörn Granlund.
2581
2582    We write things strangely below, to help gcc.  A more straightforward
2583    version:
2584
2585    _r = (nl) - _qh * (d);
2586    _t = _r + (d);
2587    if (_r >= _ql)
2588      {
2589        _qh--;
2590        _r = _t;
2591      }
2592
2593    For one operation shorter critical path, one may want to use this form:
2594
2595    _p = _qh * (d)
2596    _s = (nl) + (d);
2597    _r = (nl) - _p;
2598    _t = _s - _p;
2599    if (_r >= _ql)
2600      {
2601        _qh--;
2602        _r = _t;
2603      }
2604 */
2605 #define udiv_qrnnd_preinv3(q, r, nh, nl, d, di)                         \
2606   do {                                                                  \
2607     mp_limb_t _qh, _ql, _r;                                             \
2608     umul_ppmm (_qh, _ql, (nh), (di));                                   \
2609     if (__builtin_constant_p (nl) && (nl) == 0)                         \
2610       _qh += (nh) + 1;                                                  \
2611     else                                                                \
2612       add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));                  \
2613     _r = (nl) - _qh * (d);                                              \
2614     if (_r > _ql)       /* both > and >= should be OK */                \
2615       {                                                                 \
2616         _r += (d);                                                      \
2617         _qh--;                                                          \
2618       }                                                                 \
2619     if (UNLIKELY (_r >= (d)))                                           \
2620       {                                                                 \
2621         _r -= (d);                                                      \
2622         _qh++;                                                          \
2623       }                                                                 \
2624     (r) = _r;                                                           \
2625     (q) = _qh;                                                          \
2626   } while (0)
2627
2628 /* Compute r = nh*B mod d, where di is the inverse of d.  */
2629 #define udiv_rnd_preinv(r, nh, d, di)                                   \
2630   do {                                                                  \
2631     mp_limb_t _qh, _ql, _r;                                             \
2632     umul_ppmm (_qh, _ql, (nh), (di));                                   \
2633     _qh += (nh) + 1;                                                    \
2634     _r = - _qh * (d);                                                   \
2635     if (_r > _ql)                                                       \
2636       _r += (d);                                                        \
2637     (r) = _r;                                                           \
2638   } while (0)
2639
2640 #ifndef mpn_preinv_divrem_1  /* if not done with cpuvec in a fat binary */
2641 #define   mpn_preinv_divrem_1 __MPN(preinv_divrem_1)
2642 mp_limb_t mpn_preinv_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
2643 #endif
2644
2645
2646 /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to
2647    the plain mpn_divrem_1.  Likewise USE_PREINV_MOD_1 chooses between
2648    mpn_preinv_mod_1 and plain mpn_mod_1.  The default for both is yes, since
2649    the few CISC chips where preinv is not good have defines saying so.  */
2650 #ifndef USE_PREINV_DIVREM_1
2651 #define USE_PREINV_DIVREM_1   1
2652 #endif
2653 #ifndef USE_PREINV_MOD_1
2654 #define USE_PREINV_MOD_1   1
2655 #endif
2656
2657 #if USE_PREINV_DIVREM_1
2658 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
2659   mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)
2660 #else
2661 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
2662   mpn_divrem_1 (qp, xsize, ap, size, d)
2663 #endif
2664
2665 #if USE_PREINV_MOD_1
2666 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)       \
2667   mpn_preinv_mod_1 (src, size, divisor, inverse)
2668 #else
2669 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)       \
2670   mpn_mod_1 (src, size, divisor)
2671 #endif
2672
2673
2674 #ifndef mpn_mod_34lsub1  /* if not done with cpuvec in a fat binary */
2675 #define   mpn_mod_34lsub1 __MPN(mod_34lsub1)
2676 mp_limb_t mpn_mod_34lsub1 __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
2677 #endif
2678
2679
2680 /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
2681    plain mpn_divrem_1.  Likewise MODEXACT_1_ODD_THRESHOLD for
2682    mpn_modexact_1_odd against plain mpn_mod_1.  On most CPUs divexact and
2683    modexact are faster at all sizes, so the defaults are 0.  Those CPUs
2684    where this is not right have a tuned threshold.  */
2685 #ifndef DIVEXACT_1_THRESHOLD
2686 #define DIVEXACT_1_THRESHOLD  0
2687 #endif
2688 #ifndef MODEXACT_1_ODD_THRESHOLD
2689 #define MODEXACT_1_ODD_THRESHOLD  0
2690 #endif
2691
2692 #ifndef mpn_divexact_1  /* if not done with cpuvec in a fat binary */
2693 #define mpn_divexact_1 __MPN(divexact_1)
2694 void    mpn_divexact_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
2695 #endif
2696
2697 #define MPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor)                     \
2698   do {                                                                        \
2699     if (BELOW_THRESHOLD (size, DIVEXACT_1_THRESHOLD))                         \
2700       ASSERT_NOCARRY (mpn_divrem_1 (dst, (mp_size_t) 0, src, size, divisor)); \
2701     else                                                                      \
2702       {                                                                       \
2703         ASSERT (mpn_mod_1 (src, size, divisor) == 0);                         \
2704         mpn_divexact_1 (dst, src, size, divisor);                             \
2705       }                                                                       \
2706   } while (0)
2707
2708 #ifndef mpn_modexact_1c_odd  /* if not done with cpuvec in a fat binary */
2709 #define   mpn_modexact_1c_odd __MPN(modexact_1c_odd)
2710 mp_limb_t mpn_modexact_1c_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
2711 #endif
2712
2713 #if HAVE_NATIVE_mpn_modexact_1_odd
2714 #define   mpn_modexact_1_odd  __MPN(modexact_1_odd)
2715 mp_limb_t mpn_modexact_1_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
2716 #else
2717 #define mpn_modexact_1_odd(src,size,divisor) \
2718   mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
2719 #endif
2720
2721 #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor)                     \
2722   (ABOVE_THRESHOLD (size, MODEXACT_1_ODD_THRESHOLD)                     \
2723    ? mpn_modexact_1_odd (src, size, divisor)                            \
2724    : mpn_mod_1 (src, size, divisor))
2725
2726
2727 /* binvert_limb() sets inv to the multiplicative inverse of n modulo
2728    2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS.
2729    n must be odd (otherwise such an inverse doesn't exist).
2730
2731    This is not to be confused with invert_limb(), which is completely
2732    different.
2733
2734    The table lookup gives an inverse with the low 8 bits valid, and each
2735    multiply step doubles the number of bits.  See Jebelean "An algorithm for
2736    exact division" end of section 4 (reference in gmp.texi).
2737
2738    Possible enhancement: Could use UHWtype until the last step, if half-size
2739    multiplies are faster (might help under _LONG_LONG_LIMB).
2740
2741    Alternative: As noted in Granlund and Montgomery "Division by Invariant
2742    Integers using Multiplication" (reference in gmp.texi), n itself gives a
2743    3-bit inverse immediately, and could be used instead of a table lookup.
2744    A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into
2745    bit 3, for instance with (((n + 2) & 4) << 1) ^ n.  */
2746
2747 #define binvert_limb_table  __gmp_binvert_limb_table
2748 __GMP_DECLSPEC extern const unsigned char  binvert_limb_table[128];
2749
2750 #define binvert_limb(inv,n)                                             \
2751   do {                                                                  \
2752     mp_limb_t  __n = (n);                                               \
2753     mp_limb_t  __inv;                                                   \
2754     ASSERT ((__n & 1) == 1);                                            \
2755                                                                         \
2756     __inv = binvert_limb_table[(__n/2) & 0x7F]; /*  8 */                \
2757     if (GMP_NUMB_BITS > 8)   __inv = 2 * __inv - __inv * __inv * __n;   \
2758     if (GMP_NUMB_BITS > 16)  __inv = 2 * __inv - __inv * __inv * __n;   \
2759     if (GMP_NUMB_BITS > 32)  __inv = 2 * __inv - __inv * __inv * __n;   \
2760                                                                         \
2761     if (GMP_NUMB_BITS > 64)                                             \
2762       {                                                                 \
2763         int  __invbits = 64;                                            \
2764         do {                                                            \
2765           __inv = 2 * __inv - __inv * __inv * __n;                      \
2766           __invbits *= 2;                                               \
2767         } while (__invbits < GMP_NUMB_BITS);                            \
2768       }                                                                 \
2769                                                                         \
2770     ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1);                        \
2771     (inv) = __inv & GMP_NUMB_MASK;                                      \
2772   } while (0)
2773 #define modlimb_invert binvert_limb  /* backward compatibility */
2774
2775 /* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS.
2776    Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits.
2777    GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd
2778    we need to start from GMP_NUMB_MAX>>1. */
2779 #define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1)
2780
2781 /* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3).
2782    These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */
2783 #define GMP_NUMB_CEIL_MAX_DIV3   (GMP_NUMB_MAX / 3 + 1)
2784 #define GMP_NUMB_CEIL_2MAX_DIV3  ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT)
2785
2786
2787 /* Set r to -a mod d.  a>=d is allowed.  Can give r>d.  All should be limbs.
2788
2789    It's not clear whether this is the best way to do this calculation.
2790    Anything congruent to -a would be fine for the one limb congruence
2791    tests.  */
2792
2793 #define NEG_MOD(r, a, d)                                                \
2794   do {                                                                  \
2795     ASSERT ((d) != 0);                                                  \
2796     ASSERT_LIMB (a);                                                    \
2797     ASSERT_LIMB (d);                                                    \
2798                                                                         \
2799     if ((a) <= (d))                                                     \
2800       {                                                                 \
2801         /* small a is reasonably likely */                              \
2802         (r) = (d) - (a);                                                \
2803       }                                                                 \
2804     else                                                                \
2805       {                                                                 \
2806         unsigned   __twos;                                              \
2807         mp_limb_t  __dnorm;                                             \
2808         count_leading_zeros (__twos, d);                                \
2809         __twos -= GMP_NAIL_BITS;                                        \
2810         __dnorm = (d) << __twos;                                        \
2811         (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a);             \
2812       }                                                                 \
2813                                                                         \
2814     ASSERT_LIMB (r);                                                    \
2815   } while (0)
2816
2817 /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */
2818 #define LOW_ZEROS_MASK(n)  (((n) & -(n)) - 1)
2819
2820
2821 /* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or
2822    to 0 if there's an even number.  "n" should be an unsigned long and "p"
2823    an int.  */
2824
2825 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
2826 #define ULONG_PARITY(p, n)                                              \
2827   do {                                                                  \
2828     int __p;                                                            \
2829     __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n));                    \
2830     (p) = __p & 1;                                                      \
2831   } while (0)
2832 #endif
2833
2834 /* Cray intrinsic _popcnt. */
2835 #ifdef _CRAY
2836 #define ULONG_PARITY(p, n)      \
2837   do {                          \
2838     (p) = _popcnt (n) & 1;      \
2839   } while (0)
2840 #endif
2841
2842 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                  \
2843     && ! defined (NO_ASM) && defined (__ia64)
2844 /* unsigned long is either 32 or 64 bits depending on the ABI, zero extend
2845    to a 64 bit unsigned long long for popcnt */
2846 #define ULONG_PARITY(p, n)                                              \
2847   do {                                                                  \
2848     unsigned long long  __n = (unsigned long) (n);                      \
2849     int  __p;                                                           \
2850     __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n));                \
2851     (p) = __p & 1;                                                      \
2852   } while (0)
2853 #endif
2854
2855 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                  \
2856     && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86
2857 #if __GMP_GNUC_PREREQ (3,1)
2858 #define __GMP_qm "=Qm"
2859 #define __GMP_q "=Q"
2860 #else
2861 #define __GMP_qm "=qm"
2862 #define __GMP_q "=q"
2863 #endif
2864 #define ULONG_PARITY(p, n)                                              \
2865   do {                                                                  \
2866     char           __p;                                                 \
2867     unsigned long  __n = (n);                                           \
2868     __n ^= (__n >> 16);                                                 \
2869     __asm__ ("xorb %h1, %b1\n\t"                                        \
2870              "setpo %0"                                                 \
2871          : __GMP_qm (__p), __GMP_q (__n)                                \
2872          : "1" (__n));                                                  \
2873     (p) = __p;                                                          \
2874   } while (0)
2875 #endif
2876
2877 #if ! defined (ULONG_PARITY)
2878 #define ULONG_PARITY(p, n)                                              \
2879   do {                                                                  \
2880     unsigned long  __n = (n);                                           \
2881     int  __p = 0;                                                       \
2882     do                                                                  \
2883       {                                                                 \
2884         __p ^= 0x96696996L >> (__n & 0x1F);                             \
2885         __n >>= 5;                                                      \
2886       }                                                                 \
2887     while (__n != 0);                                                   \
2888                                                                         \
2889     (p) = __p & 1;                                                      \
2890   } while (0)
2891 #endif
2892
2893
2894 /* 3 cycles on 604 or 750 since shifts and rlwimi's can pair.  gcc (as of
2895    version 3.1 at least) doesn't seem to know how to generate rlwimi for
2896    anything other than bit-fields, so use "asm".  */
2897 #if defined (__GNUC__) && ! defined (NO_ASM)                    \
2898   && HAVE_HOST_CPU_FAMILY_powerpc && BITS_PER_MP_LIMB == 32
2899 #define BSWAP_LIMB(dst, src)                                            \
2900   do {                                                                  \
2901     mp_limb_t  __bswapl_src = (src);                                    \
2902     mp_limb_t  __tmp1 = __bswapl_src >> 24;             /* low byte */  \
2903     mp_limb_t  __tmp2 = __bswapl_src << 24;             /* high byte */ \
2904     __asm__ ("rlwimi %0, %2, 24, 16, 23"                /* 2nd low */   \
2905          : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src));           \
2906     __asm__ ("rlwimi %0, %2,  8,  8, 15"                /* 3nd high */  \
2907          : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src));           \
2908     (dst) = __tmp1 | __tmp2;                            /* whole */     \
2909   } while (0)
2910 #endif
2911
2912 /* bswap is available on i486 and up and is fast.  A combination rorw $8 /
2913    roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux
2914    kernel with xchgb instead of rorw), but this is not done here, because
2915    i386 means generic x86 and mixing word and dword operations will cause
2916    partial register stalls on P6 chips.  */
2917 #if defined (__GNUC__) && ! defined (NO_ASM)            \
2918   && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386   \
2919   && BITS_PER_MP_LIMB == 32
2920 #define BSWAP_LIMB(dst, src)                                            \
2921   do {                                                                  \
2922     __asm__ ("bswap %0" : "=r" (dst) : "0" (src));                      \
2923   } while (0)
2924 #endif
2925
2926 #if defined (__GNUC__) && ! defined (NO_ASM)            \
2927   && defined (__amd64__) && BITS_PER_MP_LIMB == 64
2928 #define BSWAP_LIMB(dst, src)                                            \
2929   do {                                                                  \
2930     __asm__ ("bswap %q0" : "=r" (dst) : "0" (src));                     \
2931   } while (0)
2932 #endif
2933
2934 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                  \
2935     && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
2936 #define BSWAP_LIMB(dst, src)                                            \
2937   do {                                                                  \
2938     __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) :  "r" (src));           \
2939   } while (0)
2940 #endif
2941
2942 /* As per glibc. */
2943 #if defined (__GNUC__) && ! defined (NO_ASM)                    \
2944   && HAVE_HOST_CPU_FAMILY_m68k && BITS_PER_MP_LIMB == 32
2945 #define BSWAP_LIMB(dst, src)                                            \
2946   do {                                                                  \
2947     mp_limb_t  __bswapl_src = (src);                                    \
2948     __asm__ ("ror%.w %#8, %0\n\t"                                       \
2949              "swap   %0\n\t"                                            \
2950              "ror%.w %#8, %0"                                           \
2951              : "=d" (dst)                                               \
2952              : "0" (__bswapl_src));                                     \
2953   } while (0)
2954 #endif
2955
2956 #if ! defined (BSWAP_LIMB)
2957 #if BITS_PER_MP_LIMB == 8
2958 #define BSWAP_LIMB(dst, src)            \
2959   do { (dst) = (src); } while (0)
2960 #endif
2961 #if BITS_PER_MP_LIMB == 16
2962 #define BSWAP_LIMB(dst, src)                    \
2963   do {                                          \
2964     (dst) = ((src) << 8) + ((src) >> 8);        \
2965   } while (0)
2966 #endif
2967 #if BITS_PER_MP_LIMB == 32
2968 #define BSWAP_LIMB(dst, src)    \
2969   do {                          \
2970     (dst) =                     \
2971       ((src) << 24)             \
2972       + (((src) & 0xFF00) << 8) \
2973       + (((src) >> 8) & 0xFF00) \
2974       + ((src) >> 24);          \
2975   } while (0)
2976 #endif
2977 #if BITS_PER_MP_LIMB == 64
2978 #define BSWAP_LIMB(dst, src)            \
2979   do {                                  \
2980     (dst) =                             \
2981       ((src) << 56)                     \
2982       + (((src) & 0xFF00) << 40)        \
2983       + (((src) & 0xFF0000) << 24)      \
2984       + (((src) & 0xFF000000) << 8)     \
2985       + (((src) >> 8) & 0xFF000000)     \
2986       + (((src) >> 24) & 0xFF0000)      \
2987       + (((src) >> 40) & 0xFF00)        \
2988       + ((src) >> 56);                  \
2989   } while (0)
2990 #endif
2991 #endif
2992
2993 #if ! defined (BSWAP_LIMB)
2994 #define BSWAP_LIMB(dst, src)                            \
2995   do {                                                  \
2996     mp_limb_t  __bswapl_src = (src);                    \
2997     mp_limb_t  __dst = 0;                               \
2998     int        __i;                                     \
2999     for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++)       \
3000       {                                                 \
3001         __dst = (__dst << 8) | (__bswapl_src & 0xFF);   \
3002         __bswapl_src >>= 8;                             \
3003       }                                                 \
3004     (dst) = __dst;                                      \
3005   } while (0)
3006 #endif
3007
3008
3009 /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
3010    those we know are fast.  */
3011 #if defined (__GNUC__) && ! defined (NO_ASM)                            \
3012   && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN                     \
3013   && (HAVE_HOST_CPU_powerpc604                                          \
3014       || HAVE_HOST_CPU_powerpc604e                                      \
3015       || HAVE_HOST_CPU_powerpc750                                       \
3016       || HAVE_HOST_CPU_powerpc7400)
3017 #define BSWAP_LIMB_FETCH(limb, src)                                     \
3018   do {                                                                  \
3019     mp_srcptr  __blf_src = (src);                                       \
3020     mp_limb_t  __limb;                                                  \
3021     __asm__ ("lwbrx %0, 0, %1"                                          \
3022              : "=r" (__limb)                                            \
3023              : "r" (__blf_src),                                         \
3024                "m" (*__blf_src));                                       \
3025     (limb) = __limb;                                                    \
3026   } while (0)
3027 #endif
3028
3029 #if ! defined (BSWAP_LIMB_FETCH)
3030 #define BSWAP_LIMB_FETCH(limb, src)  BSWAP_LIMB (limb, *(src))
3031 #endif
3032
3033
3034 /* On the same basis that lwbrx might be slow, restrict stwbrx to those we
3035    know are fast.  FIXME: Is this necessary?  */
3036 #if defined (__GNUC__) && ! defined (NO_ASM)                            \
3037   && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN                     \
3038   && (HAVE_HOST_CPU_powerpc604                                          \
3039       || HAVE_HOST_CPU_powerpc604e                                      \
3040       || HAVE_HOST_CPU_powerpc750                                       \
3041       || HAVE_HOST_CPU_powerpc7400)
3042 #define BSWAP_LIMB_STORE(dst, limb)                                     \
3043   do {                                                                  \
3044     mp_ptr     __dst = (dst);                                           \
3045     mp_limb_t  __limb = (limb);                                         \
3046     __asm__ ("stwbrx %1, 0, %2"                                         \
3047              : "=m" (*__dst)                                            \
3048              : "r" (__limb),                                            \
3049                "r" (__dst));                                            \
3050   } while (0)
3051 #endif
3052
3053 #if ! defined (BSWAP_LIMB_STORE)
3054 #define BSWAP_LIMB_STORE(dst, limb)  BSWAP_LIMB (*(dst), limb)
3055 #endif
3056
3057
3058 /* Byte swap limbs from {src,size} and store at {dst,size}. */
3059 #define MPN_BSWAP(dst, src, size)                       \
3060   do {                                                  \
3061     mp_ptr     __dst = (dst);                           \
3062     mp_srcptr  __src = (src);                           \
3063     mp_size_t  __size = (size);                         \
3064     mp_size_t  __i;                                     \
3065     ASSERT ((size) >= 0);                               \
3066     ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));   \
3067     CRAY_Pragma ("_CRI ivdep");                         \
3068     for (__i = 0; __i < __size; __i++)                  \
3069       {                                                 \
3070         BSWAP_LIMB_FETCH (*__dst, __src);               \
3071         __dst++;                                        \
3072         __src++;                                        \
3073       }                                                 \
3074   } while (0)
3075
3076 /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
3077 #define MPN_BSWAP_REVERSE(dst, src, size)               \
3078   do {                                                  \
3079     mp_ptr     __dst = (dst);                           \
3080     mp_size_t  __size = (size);                         \
3081     mp_srcptr  __src = (src) + __size - 1;              \
3082     mp_size_t  __i;                                     \
3083     ASSERT ((size) >= 0);                               \
3084     ASSERT (! MPN_OVERLAP_P (dst, size, src, size));    \
3085     CRAY_Pragma ("_CRI ivdep");                         \
3086     for (__i = 0; __i < __size; __i++)                  \
3087       {                                                 \
3088         BSWAP_LIMB_FETCH (*__dst, __src);               \
3089         __dst++;                                        \
3090         __src--;                                        \
3091       }                                                 \
3092   } while (0)
3093
3094
3095 /* No processor claiming to be SPARC v9 compliant seems to
3096    implement the POPC instruction.  Disable pattern for now.  */
3097 #if 0
3098 #if defined __GNUC__ && defined __sparc_v9__ && BITS_PER_MP_LIMB == 64
3099 #define popc_limb(result, input)                                        \
3100   do {                                                                  \
3101     DItype __res;                                                       \
3102     __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input));              \
3103   } while (0)
3104 #endif
3105 #endif
3106
3107 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
3108 #define popc_limb(result, input)                                        \
3109   do {                                                                  \
3110     __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input));             \
3111   } while (0)
3112 #endif
3113
3114 /* Cray intrinsic. */
3115 #ifdef _CRAY
3116 #define popc_limb(result, input)        \
3117   do {                                  \
3118     (result) = _popcnt (input);         \
3119   } while (0)
3120 #endif
3121
3122 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                  \
3123     && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
3124 #define popc_limb(result, input)                                        \
3125   do {                                                                  \
3126     __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input));           \
3127   } while (0)
3128 #endif
3129
3130 /* Cool population count of an mp_limb_t.
3131    You have to figure out how this works, We won't tell you!
3132
3133    The constants could also be expressed as:
3134      0x55... = [2^N / 3]     = [(2^N-1)/3]
3135      0x33... = [2^N / 5]     = [(2^N-1)/5]
3136      0x0f... = [2^N / 17]    = [(2^N-1)/17]
3137      (N is GMP_LIMB_BITS, [] denotes truncation.) */
3138
3139 #if ! defined (popc_limb) && GMP_LIMB_BITS == 8
3140 #define popc_limb(result, input)                                        \
3141   do {                                                                  \
3142     mp_limb_t  __x = (input);                                           \
3143     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                                \
3144     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);     \
3145     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                        \
3146     (result) = __x & 0xff;                                              \
3147   } while (0)
3148 #endif
3149
3150 #if ! defined (popc_limb) && GMP_LIMB_BITS == 16
3151 #define popc_limb(result, input)                                        \
3152   do {                                                                  \
3153     mp_limb_t  __x = (input);                                           \
3154     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                                \
3155     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);     \
3156     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                        \
3157     __x = ((__x >> 8) + __x);                                           \
3158     (result) = __x & 0xff;                                              \
3159   } while (0)
3160 #endif
3161
3162 #if ! defined (popc_limb) && GMP_LIMB_BITS == 32
3163 #define popc_limb(result, input)                                        \
3164   do {                                                                  \
3165     mp_limb_t  __x = (input);                                           \
3166     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                                \
3167     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);     \
3168     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                        \
3169     __x = ((__x >> 8) + __x);                                           \
3170     __x = ((__x >> 16) + __x);                                          \
3171     (result) = __x & 0xff;                                              \
3172   } while (0)
3173 #endif
3174
3175 #if ! defined (popc_limb) && GMP_LIMB_BITS == 64
3176 #define popc_limb(result, input)                                        \
3177   do {                                                                  \
3178     mp_limb_t  __x = (input);                                           \
3179     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                                \
3180     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);     \
3181     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                        \
3182     __x = ((__x >> 8) + __x);                                           \
3183     __x = ((__x >> 16) + __x);                                          \
3184     __x = ((__x >> 32) + __x);                                          \
3185     (result) = __x & 0xff;                                              \
3186   } while (0)
3187 #endif
3188
3189
3190 /* Define stuff for longlong.h.  */
3191 #if HAVE_ATTRIBUTE_MODE
3192 typedef unsigned int UQItype    __attribute__ ((mode (QI)));
3193 typedef          int SItype     __attribute__ ((mode (SI)));
3194 typedef unsigned int USItype    __attribute__ ((mode (SI)));
3195 typedef          int DItype     __attribute__ ((mode (DI)));
3196 typedef unsigned int UDItype    __attribute__ ((mode (DI)));
3197 #else
3198 typedef unsigned char UQItype;
3199 typedef          long SItype;
3200 typedef unsigned long USItype;
3201 #if HAVE_LONG_LONG
3202 typedef long long int DItype;
3203 typedef unsigned long long int UDItype;
3204 #else /* Assume `long' gives us a wide enough type.  Needed for hppa2.0w.  */
3205 typedef long int DItype;
3206 typedef unsigned long int UDItype;
3207 #endif
3208 #endif
3209
3210 typedef mp_limb_t UWtype;
3211 typedef unsigned int UHWtype;
3212 #define W_TYPE_SIZE BITS_PER_MP_LIMB
3213
3214 /* Define ieee_double_extract and _GMP_IEEE_FLOATS.
3215
3216    Bit field packing is "implementation defined" according to C99, which
3217    leaves us at the compiler's mercy here.  For some systems packing is
3218    defined in the ABI (eg. x86).  In any case so far it seems universal that
3219    little endian systems pack from low to high, and big endian from high to
3220    low within the given type.
3221
3222    Within the fields we rely on the integer endianness being the same as the
3223    float endianness, this is true everywhere we know of and it'd be a fairly
3224    strange system that did anything else.  */
3225
3226 #if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
3227 #define _GMP_IEEE_FLOATS 1
3228 union ieee_double_extract
3229 {
3230   struct
3231     {
3232       gmp_uint_least32_t manh:20;
3233       gmp_uint_least32_t exp:11;
3234       gmp_uint_least32_t sig:1;
3235       gmp_uint_least32_t manl:32;
3236     } s;
3237   double d;
3238 };
3239 #endif
3240
3241 #if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
3242 #define _GMP_IEEE_FLOATS 1
3243 union ieee_double_extract
3244 {
3245   struct
3246     {
3247       gmp_uint_least32_t manl:32;
3248       gmp_uint_least32_t manh:20;
3249       gmp_uint_least32_t exp:11;
3250       gmp_uint_least32_t sig:1;
3251     } s;
3252   double d;
3253 };
3254 #endif
3255
3256 #if HAVE_DOUBLE_IEEE_BIG_ENDIAN
3257 #define _GMP_IEEE_FLOATS 1
3258 union ieee_double_extract
3259 {
3260   struct
3261     {
3262       gmp_uint_least32_t sig:1;
3263       gmp_uint_least32_t exp:11;
3264       gmp_uint_least32_t manh:20;
3265       gmp_uint_least32_t manl:32;
3266     } s;
3267   double d;
3268 };
3269 #endif
3270
3271
3272 /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
3273    that don't convert ulong->double correctly (eg. SunOS 4 native cc).  */
3274 #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))
3275 /* Maximum number of limbs it will take to store any `double'.
3276    We assume doubles have 53 mantissa bits.  */
3277 #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1)
3278
3279 int __gmp_extract_double __GMP_PROTO ((mp_ptr, double));
3280
3281 #define mpn_get_d __gmpn_get_d
3282 double mpn_get_d __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, long)) __GMP_ATTRIBUTE_PURE;
3283
3284
3285 /* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes
3286    a_inf if x is an infinity.  Both are considered unlikely values, for
3287    branch prediction.  */
3288
3289 #if _GMP_IEEE_FLOATS
3290 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)  \
3291   do {                                          \
3292     union ieee_double_extract  u;               \
3293     u.d = (x);                                  \
3294     if (UNLIKELY (u.s.exp == 0x7FF))            \
3295       {                                         \
3296         if (u.s.manl == 0 && u.s.manh == 0)     \
3297           { a_inf; }                            \
3298         else                                    \
3299           { a_nan; }                            \
3300       }                                         \
3301   } while (0)
3302 #endif
3303
3304 #if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP
3305 /* no nans or infs in these formats */
3306 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)  \
3307   do { } while (0)
3308 #endif
3309
3310 #ifndef DOUBLE_NAN_INF_ACTION
3311 /* Unknown format, try something generic.
3312    NaN should be "unordered", so x!=x.
3313    Inf should be bigger than DBL_MAX.  */
3314 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)                  \
3315   do {                                                          \
3316     {                                                           \
3317       if (UNLIKELY ((x) != (x)))                                \
3318         { a_nan; }                                              \
3319       else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX))      \
3320         { a_inf; }                                              \
3321     }                                                           \
3322   } while (0)
3323 #endif
3324
3325 /* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles
3326    in the coprocessor, which means a bigger exponent range than normal, and
3327    depending on the rounding mode, a bigger mantissa than normal.  (See
3328    "Disappointments" in the gcc manual.)  FORCE_DOUBLE stores and fetches
3329    "d" through memory to force any rounding and overflows to occur.
3330
3331    On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm
3332    registers, where there's no such extra precision and no need for the
3333    FORCE_DOUBLE.  We don't bother to detect this since the present uses for
3334    FORCE_DOUBLE are only in test programs and default generic C code.
3335
3336    Not quite sure that an "automatic volatile" will use memory, but it does
3337    in gcc.  An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since
3338    apparently matching operands like "0" are only allowed on a register
3339    output.  gcc 3.4 warns about this, though in fact it and past versions
3340    seem to put the operand through memory as hoped.  */
3341
3342 #if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86      \
3343      || defined (__amd64__))
3344 #define FORCE_DOUBLE(d) \
3345   do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0)
3346 #else
3347 #define FORCE_DOUBLE(d)  do { } while (0)
3348 #endif
3349
3350
3351 extern int __gmp_junk;
3352 extern const int __gmp_0;
3353 void __gmp_exception __GMP_PROTO ((int)) ATTRIBUTE_NORETURN;
3354 void __gmp_divide_by_zero __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3355 void __gmp_sqrt_of_negative __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3356 void __gmp_invalid_operation __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3357 #define GMP_ERROR(code)   __gmp_exception (code)
3358 #define DIVIDE_BY_ZERO    __gmp_divide_by_zero ()
3359 #define SQRT_OF_NEGATIVE  __gmp_sqrt_of_negative ()
3360
3361 #if defined _LONG_LONG_LIMB
3362 #if __GMP_HAVE_TOKEN_PASTE
3363 #define CNST_LIMB(C) ((mp_limb_t) C##LL)
3364 #else
3365 #define CNST_LIMB(C) ((mp_limb_t) C/**/LL)
3366 #endif
3367 #else /* not _LONG_LONG_LIMB */
3368 #if __GMP_HAVE_TOKEN_PASTE
3369 #define CNST_LIMB(C) ((mp_limb_t) C##L)
3370 #else
3371 #define CNST_LIMB(C) ((mp_limb_t) C/**/L)
3372 #endif
3373 #endif /* _LONG_LONG_LIMB */
3374
3375 /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
3376 #if GMP_NUMB_BITS == 2
3377 #define PP 0x3                                  /* 3 */
3378 #define PP_FIRST_OMITTED 5
3379 #endif
3380 #if GMP_NUMB_BITS == 4
3381 #define PP 0xF                                  /* 3 x 5 */
3382 #define PP_FIRST_OMITTED 7
3383 #endif
3384 #if GMP_NUMB_BITS == 8
3385 #define PP 0x69                                 /* 3 x 5 x 7 */
3386 #define PP_FIRST_OMITTED 11
3387 #endif
3388 #if GMP_NUMB_BITS == 16
3389 #define PP 0x3AA7                               /* 3 x 5 x 7 x 11 x 13 */
3390 #define PP_FIRST_OMITTED 17
3391 #endif
3392 #if GMP_NUMB_BITS == 32
3393 #define PP 0xC0CFD797L                          /* 3 x 5 x 7 x 11 x ... x 29 */
3394 #define PP_INVERTED 0x53E5645CL
3395 #define PP_FIRST_OMITTED 31
3396 #endif
3397 #if GMP_NUMB_BITS == 64
3398 #define PP CNST_LIMB(0xE221F97C30E94E1D)        /* 3 x 5 x 7 x 11 x ... x 53 */
3399 #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
3400 #define PP_FIRST_OMITTED 59
3401 #endif
3402 #ifndef PP_FIRST_OMITTED
3403 #define PP_FIRST_OMITTED 3
3404 #endif
3405
3406
3407
3408 /* BIT1 means a result value in bit 1 (second least significant bit), with a
3409    zero bit representing +1 and a one bit representing -1.  Bits other than
3410    bit 1 are garbage.  These are meant to be kept in "int"s, and casts are
3411    used to ensure the expressions are "int"s even if a and/or b might be
3412    other types.
3413
3414    JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
3415    and their speed is important.  Expressions are used rather than
3416    conditionals to accumulate sign changes, which effectively means XORs
3417    instead of conditional JUMPs. */
3418
3419 /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
3420 #define JACOBI_S0(a)   (((a) == 1) | ((a) == -1))
3421
3422 /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
3423 #define JACOBI_U0(a)   ((a) == 1)
3424
3425 /* (a/0), with a given by low and size;
3426    is 1 if a=+/-1, 0 otherwise */
3427 #define JACOBI_LS0(alow,asize) \
3428   (((asize) == 1 || (asize) == -1) && (alow) == 1)
3429
3430 /* (a/0), with a an mpz_t;
3431    fetch of low limb always valid, even if size is zero */
3432 #define JACOBI_Z0(a)   JACOBI_LS0 (PTR(a)[0], SIZ(a))
3433
3434 /* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */
3435 #define JACOBI_0U(b)   ((b) == 1)
3436
3437 /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
3438 #define JACOBI_0S(b)   ((b) == 1 || (b) == -1)
3439
3440 /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */
3441 #define JACOBI_0LS(blow,bsize) \
3442   (((bsize) == 1 || (bsize) == -1) && (blow) == 1)
3443
3444 /* Convert a bit1 to +1 or -1. */
3445 #define JACOBI_BIT1_TO_PN(result_bit1) \
3446   (1 - ((int) (result_bit1) & 2))
3447
3448 /* (2/b), with b unsigned and odd;
3449    is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
3450    hence obtained from (b>>1)^b */
3451 #define JACOBI_TWO_U_BIT1(b) \
3452   ((int) (((b) >> 1) ^ (b)))
3453
3454 /* (2/b)^twos, with b unsigned and odd */
3455 #define JACOBI_TWOS_U_BIT1(twos, b) \
3456   ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
3457
3458 /* (2/b)^twos, with b unsigned and odd */
3459 #define JACOBI_TWOS_U(twos, b) \
3460   (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
3461
3462 /* (-1/b), with b odd (signed or unsigned);
3463    is (-1)^((b-1)/2) */
3464 #define JACOBI_N1B_BIT1(b) \
3465   ((int) (b))
3466
3467 /* (a/b) effect due to sign of a: signed/unsigned, b odd;
3468    is (-1/b) if a<0, or +1 if a>=0 */
3469 #define JACOBI_ASGN_SU_BIT1(a, b) \
3470   ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))
3471
3472 /* (a/b) effect due to sign of b: signed/signed;
3473    is -1 if a and b both negative, +1 otherwise */
3474 #define JACOBI_BSGN_SS_BIT1(a, b) \
3475   ((((a)<0) & ((b)<0)) << 1)
3476
3477 /* (a/b) effect due to sign of b: signed/mpz;
3478    is -1 if a and b both negative, +1 otherwise */
3479 #define JACOBI_BSGN_SZ_BIT1(a, b) \
3480   JACOBI_BSGN_SS_BIT1 (a, SIZ(b))
3481
3482 /* (a/b) effect due to sign of b: mpz/signed;
3483    is -1 if a and b both negative, +1 otherwise */
3484 #define JACOBI_BSGN_ZS_BIT1(a, b) \
3485   JACOBI_BSGN_SZ_BIT1 (b, a)
3486
3487 /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;
3488    is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if
3489    both a,b==3mod4, achieved in bit 1 by a&b.  No ASSERT()s about a,b odd
3490    because this is used in a couple of places with only bit 1 of a or b
3491    valid. */
3492 #define JACOBI_RECIP_UU_BIT1(a, b) \
3493   ((int) ((a) & (b)))
3494
3495 /* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and
3496    decrementing b_size.  b_low should be b_ptr[0] on entry, and will be
3497    updated for the new b_ptr.  result_bit1 is updated according to the
3498    factors of 2 stripped, as per (a/2).  */
3499 #define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low)    \
3500   do {                                                                  \
3501     ASSERT ((b_size) >= 1);                                             \
3502     ASSERT ((b_low) == (b_ptr)[0]);                                     \
3503                                                                         \
3504     while (UNLIKELY ((b_low) == 0))                                     \
3505       {                                                                 \
3506         (b_size)--;                                                     \
3507         ASSERT ((b_size) >= 1);                                         \
3508         (b_ptr)++;                                                      \
3509         (b_low) = *(b_ptr);                                             \
3510                                                                         \
3511         ASSERT (((a) & 1) != 0);                                        \
3512         if ((GMP_NUMB_BITS % 2) == 1)                                   \
3513           (result_bit1) ^= JACOBI_TWO_U_BIT1(a);                        \
3514       }                                                                 \
3515   } while (0)
3516
3517 /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
3518    modexact_1_odd, but in either case leaving a_rem<b.  b must be odd and
3519    unsigned.  modexact_1_odd effectively calculates -a mod b, and
3520    result_bit1 is adjusted for the factor of -1.
3521
3522    The way mpn_modexact_1_odd sometimes bases its remainder on a_size and
3523    sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what
3524    factor to introduce into result_bit1, so for that case use mpn_mod_1
3525    unconditionally.
3526
3527    FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used
3528    for odd GMP_NUMB_BITS would be good.  Perhaps it could mung its result,
3529    or not skip a divide step, or something. */
3530
3531 #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
3532   do {                                                                     \
3533     mp_srcptr  __a_ptr  = (a_ptr);                                         \
3534     mp_size_t  __a_size = (a_size);                                        \
3535     mp_limb_t  __b      = (b);                                             \
3536                                                                            \
3537     ASSERT (__a_size >= 1);                                                \
3538     ASSERT (__b & 1);                                                      \
3539                                                                            \
3540     if ((GMP_NUMB_BITS % 2) != 0                                           \
3541         || BELOW_THRESHOLD (__a_size, MODEXACT_1_ODD_THRESHOLD))           \
3542       {                                                                    \
3543         (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b);                      \
3544       }                                                                    \
3545     else                                                                   \
3546       {                                                                    \
3547         (result_bit1) ^= JACOBI_N1B_BIT1 (__b);                            \
3548         (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b);             \
3549       }                                                                    \
3550   } while (0)
3551
3552 /* Matrix multiplication */
3553 #define   mpn_matrix22_mul __MPN(matrix22_mul)
3554 void      mpn_matrix22_mul __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
3555 #define   mpn_matrix22_mul_strassen __MPN(matrix22_mul_strassen)
3556 void      mpn_matrix22_mul_strassen __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
3557 #define   mpn_matrix22_mul_itch __MPN(matrix22_mul_itch)
3558 mp_size_t mpn_matrix22_mul_itch __GMP_PROTO ((mp_size_t, mp_size_t));
3559
3560 #ifndef MATRIX22_STRASSEN_THRESHOLD
3561 #define MATRIX22_STRASSEN_THRESHOLD 30
3562 #endif
3563
3564 /* HGCD definitions */
3565
3566 /* Extract one numb, shifting count bits left
3567     ________  ________
3568    |___xh___||___xl___|
3569           |____r____|
3570    >count <
3571
3572    The count includes any nail bits, so it should work fine if count
3573    is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of
3574    xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS.
3575
3576    FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for
3577    those calls where the count high bits of xh may be non-zero.
3578 */
3579
3580 #define MPN_EXTRACT_NUMB(count, xh, xl)                         \
3581   ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) |      \
3582    ((xl) >> (GMP_LIMB_BITS - (count))))
3583
3584
3585 /* The matrix non-negative M = (u, u'; v,v') keeps track of the
3586    reduction (a;b) = M (alpha; beta) where alpha, beta are smaller
3587    than a, b. The determinant must always be one, so that M has an
3588    inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1
3589    bits. */
3590 struct hgcd_matrix1
3591 {
3592   mp_limb_t u[2][2];
3593 };
3594
3595 #define mpn_hgcd2 __MPN (hgcd2)
3596 int mpn_hgcd2 __GMP_PROTO ((mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *));
3597
3598 #define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector)
3599 mp_size_t mpn_hgcd_mul_matrix1_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
3600
3601 #define mpn_hgcd_mul_matrix1_inverse_vector __MPN (hgcd_mul_matrix1_inverse_vector)
3602 mp_size_t mpn_hgcd_mul_matrix1_inverse_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
3603
3604 struct hgcd_matrix
3605 {
3606   mp_size_t alloc;              /* for sanity checking only */
3607   mp_size_t n;
3608   mp_ptr p[2][2];
3609 };
3610
3611 #define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1))
3612
3613 #define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init)
3614 void mpn_hgcd_matrix_init __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr));
3615
3616 #define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul)
3617 void mpn_hgcd_matrix_mul __GMP_PROTO ((struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr));
3618
3619 #define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)
3620 mp_size_t mpn_hgcd_matrix_adjust __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3621
3622 #define mpn_hgcd_itch __MPN (hgcd_itch)
3623 mp_size_t mpn_hgcd_itch __GMP_PROTO ((mp_size_t));
3624
3625 #define mpn_hgcd __MPN (hgcd)
3626 mp_size_t mpn_hgcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
3627
3628 #define MPN_HGCD_LEHMER_ITCH(n) (n)
3629
3630 #define mpn_hgcd_lehmer __MPN (hgcd_lehmer)
3631 mp_size_t mpn_hgcd_lehmer __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
3632
3633 /* Needs storage for the quotient */
3634 #define MPN_GCD_SUBDIV_STEP_ITCH(n) (n)
3635
3636 #define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step)
3637 mp_size_t mpn_gcd_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3638
3639 #define MPN_GCD_LEHMER_N_ITCH(n) (n)
3640
3641 #define mpn_gcd_lehmer_n __MPN(gcd_lehmer_n)
3642 mp_size_t mpn_gcd_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3643
3644 #define mpn_gcdext_subdiv_step __MPN(gcdext_subdiv_step)
3645 mp_size_t mpn_gcdext_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr));
3646
3647 #define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3)
3648
3649 #define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n)
3650 mp_size_t mpn_gcdext_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3651
3652 /* 4*(an + 1) + 4*(bn + 1) + an */
3653 #define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8)
3654
3655 #ifndef HGCD_THRESHOLD
3656 #define HGCD_THRESHOLD 400
3657 #endif
3658
3659 #ifndef GCD_DC_THRESHOLD
3660 #define GCD_DC_THRESHOLD 1000
3661 #endif
3662
3663 #ifndef GCDEXT_DC_THRESHOLD
3664 #define GCDEXT_DC_THRESHOLD 600
3665 #endif
3666
3667 /* Definitions for mpn_set_str and mpn_get_str */
3668 struct powers
3669 {
3670   mp_ptr p;                     /* actual power value */
3671   mp_size_t n;                  /* # of limbs at p */
3672   mp_size_t shift;              /* weight of lowest limb, in limb base B */
3673   size_t digits_in_base;        /* number of corresponding digits */
3674   int base;
3675 };
3676 typedef struct powers powers_t;
3677 #define mpn_dc_set_str_powtab_alloc(n) ((n) + GMP_LIMB_BITS)
3678 #define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS)
3679 #define mpn_dc_get_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS)
3680 #define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS)
3681
3682 #define   mpn_dc_set_str __MPN(dc_set_str)
3683 mp_size_t mpn_dc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr));
3684 #define   mpn_bc_set_str __MPN(bc_set_str)
3685 mp_size_t mpn_bc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int));
3686 #define   mpn_set_str_compute_powtab __MPN(set_str_compute_powtab)
3687 void      mpn_set_str_compute_powtab __GMP_PROTO ((powers_t *, mp_ptr, mp_size_t, int));
3688
3689
3690 /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
3691    limb and adds an extra limb.  __GMPF_PREC_TO_BITS drops that extra limb,
3692    hence giving back the user's size in bits rounded up.  Notice that
3693    converting prec->bits->prec gives an unchanged value.  */
3694 #define __GMPF_BITS_TO_PREC(n)                                          \
3695   ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))
3696 #define __GMPF_PREC_TO_BITS(n) \
3697   ((unsigned long) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)
3698
3699 extern mp_size_t __gmp_default_fp_limb_precision;
3700
3701
3702 /* Set n to the number of significant digits an mpf of the given _mp_prec
3703    field, in the given base.  This is a rounded up value, designed to ensure
3704    there's enough digits to reproduce all the guaranteed part of the value.
3705
3706    There are prec many limbs, but the high might be only "1" so forget it
3707    and just count prec-1 limbs into chars.  +1 rounds that upwards, and a
3708    further +1 is because the limbs usually won't fall on digit boundaries.
3709
3710    FIXME: If base is a power of 2 and the bits per digit divides
3711    BITS_PER_MP_LIMB then the +2 is unnecessary.  This happens always for
3712    base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
3713
3714 #define MPF_SIGNIFICANT_DIGITS(n, base, prec)                           \
3715   do {                                                                  \
3716     ASSERT (base >= 2 && base < numberof (mp_bases));                   \
3717     (n) = 2 + (size_t) ((((size_t) (prec) - 1) * GMP_NUMB_BITS)         \
3718                         * mp_bases[(base)].chars_per_bit_exactly);      \
3719   } while (0)
3720
3721
3722 /* Decimal point string, from the current C locale.  Needs <langinfo.h> for
3723    nl_langinfo and constants, preferably with _GNU_SOURCE defined to get
3724    DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under
3725    their respective #if HAVE_FOO_H.
3726
3727    GLIBC recommends nl_langinfo because getting only one facet can be
3728    faster, apparently. */
3729
3730 /* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */
3731 #if HAVE_NL_LANGINFO && defined (DECIMAL_POINT)
3732 #define GMP_DECIMAL_POINT  (nl_langinfo (DECIMAL_POINT))
3733 #endif
3734 /* RADIXCHAR is deprecated, still in unix98 or some such. */
3735 #if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT)
3736 #define GMP_DECIMAL_POINT  (nl_langinfo (RADIXCHAR))
3737 #endif
3738 /* localeconv is slower since it returns all locale stuff */
3739 #if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT)
3740 #define GMP_DECIMAL_POINT  (localeconv()->decimal_point)
3741 #endif
3742 #if ! defined (GMP_DECIMAL_POINT)
3743 #define GMP_DECIMAL_POINT  (".")
3744 #endif
3745
3746
3747 #define DOPRNT_CONV_FIXED        1
3748 #define DOPRNT_CONV_SCIENTIFIC   2
3749 #define DOPRNT_CONV_GENERAL      3
3750
3751 #define DOPRNT_JUSTIFY_NONE      0
3752 #define DOPRNT_JUSTIFY_LEFT      1
3753 #define DOPRNT_JUSTIFY_RIGHT     2
3754 #define DOPRNT_JUSTIFY_INTERNAL  3
3755
3756 #define DOPRNT_SHOWBASE_YES      1
3757 #define DOPRNT_SHOWBASE_NO       2
3758 #define DOPRNT_SHOWBASE_NONZERO  3
3759
3760 struct doprnt_params_t {
3761   int         base;          /* negative for upper case */
3762   int         conv;          /* choices above */
3763   const char  *expfmt;       /* exponent format */
3764   int         exptimes4;     /* exponent multiply by 4 */
3765   char        fill;          /* character */
3766   int         justify;       /* choices above */
3767   int         prec;          /* prec field, or -1 for all digits */
3768   int         showbase;      /* choices above */
3769   int         showpoint;     /* if radix point always shown */
3770   int         showtrailing;  /* if trailing zeros wanted */
3771   char        sign;          /* '+', ' ', or '\0' */
3772   int         width;         /* width field */
3773 };
3774
3775 #if _GMP_H_HAVE_VA_LIST
3776
3777 typedef int (*doprnt_format_t) __GMP_PROTO ((void *, const char *, va_list));
3778 typedef int (*doprnt_memory_t) __GMP_PROTO ((void *, const char *, size_t));
3779 typedef int (*doprnt_reps_t)   __GMP_PROTO ((void *, int, int));
3780 typedef int (*doprnt_final_t)  __GMP_PROTO ((void *));
3781
3782 struct doprnt_funs_t {
3783   doprnt_format_t  format;
3784   doprnt_memory_t  memory;
3785   doprnt_reps_t    reps;
3786   doprnt_final_t   final;   /* NULL if not required */
3787 };
3788
3789 extern const struct doprnt_funs_t  __gmp_fprintf_funs;
3790 extern const struct doprnt_funs_t  __gmp_sprintf_funs;
3791 extern const struct doprnt_funs_t  __gmp_snprintf_funs;
3792 extern const struct doprnt_funs_t  __gmp_obstack_printf_funs;
3793 extern const struct doprnt_funs_t  __gmp_ostream_funs;
3794
3795 /* "buf" is a __gmp_allocate_func block of "alloc" many bytes.  The first
3796    "size" of these have been written.  "alloc > size" is maintained, so
3797    there's room to store a '\0' at the end.  "result" is where the
3798    application wants the final block pointer.  */
3799 struct gmp_asprintf_t {
3800   char    **result;
3801   char    *buf;
3802   size_t  size;
3803   size_t  alloc;
3804 };
3805
3806 #define GMP_ASPRINTF_T_INIT(d, output)                          \
3807   do {                                                          \
3808     (d).result = (output);                                      \
3809     (d).alloc = 256;                                            \
3810     (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc);      \
3811     (d).size = 0;                                               \
3812   } while (0)
3813
3814 /* If a realloc is necessary, use twice the size actually required, so as to
3815    avoid repeated small reallocs.  */
3816 #define GMP_ASPRINTF_T_NEED(d, n)                                       \
3817   do {                                                                  \
3818     size_t  alloc, newsize, newalloc;                                   \
3819     ASSERT ((d)->alloc >= (d)->size + 1);                               \
3820                                                                         \
3821     alloc = (d)->alloc;                                                 \
3822     newsize = (d)->size + (n);                                          \
3823     if (alloc <= newsize)                                               \
3824       {                                                                 \
3825         newalloc = 2*newsize;                                           \
3826         (d)->alloc = newalloc;                                          \
3827         (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf,                \
3828                                                alloc, newalloc, char);  \
3829       }                                                                 \
3830   } while (0)
3831
3832 __GMP_DECLSPEC int __gmp_asprintf_memory __GMP_PROTO ((struct gmp_asprintf_t *, const char *, size_t));
3833 __GMP_DECLSPEC int __gmp_asprintf_reps __GMP_PROTO ((struct gmp_asprintf_t *, int, int));
3834 __GMP_DECLSPEC int __gmp_asprintf_final __GMP_PROTO ((struct gmp_asprintf_t *));
3835
3836 /* buf is where to write the next output, and size is how much space is left
3837    there.  If the application passed size==0 then that's what we'll have
3838    here, and nothing at all should be written.  */
3839 struct gmp_snprintf_t {
3840   char    *buf;
3841   size_t  size;
3842 };
3843
3844 /* Add the bytes printed by the call to the total retval, or bail out on an
3845    error.  */
3846 #define DOPRNT_ACCUMULATE(call) \
3847   do {                          \
3848     int  __ret;                 \
3849     __ret = call;               \
3850     if (__ret == -1)            \
3851       goto error;               \
3852     retval += __ret;            \
3853   } while (0)
3854 #define DOPRNT_ACCUMULATE_FUN(fun, params)      \
3855   do {                                          \
3856     ASSERT ((fun) != NULL);                     \
3857     DOPRNT_ACCUMULATE ((*(fun)) params);        \
3858   } while (0)
3859
3860 #define DOPRNT_FORMAT(fmt, ap)                          \
3861   DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
3862 #define DOPRNT_MEMORY(ptr, len)                                 \
3863   DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
3864 #define DOPRNT_REPS(c, n)                               \
3865   DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
3866
3867 #define DOPRNT_STRING(str)      DOPRNT_MEMORY (str, strlen (str))
3868
3869 #define DOPRNT_REPS_MAYBE(c, n) \
3870   do {                          \
3871     if ((n) != 0)               \
3872       DOPRNT_REPS (c, n);       \
3873   } while (0)
3874 #define DOPRNT_MEMORY_MAYBE(ptr, len)   \
3875   do {                                  \
3876     if ((len) != 0)                     \
3877       DOPRNT_MEMORY (ptr, len);         \
3878   } while (0)
3879
3880 __GMP_DECLSPEC int __gmp_doprnt __GMP_PROTO ((const struct doprnt_funs_t *, void *, const char *, va_list));
3881 __GMP_DECLSPEC int __gmp_doprnt_integer __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *));
3882
3883 #define __gmp_doprnt_mpf __gmp_doprnt_mpf2
3884 __GMP_DECLSPEC int __gmp_doprnt_mpf __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr));
3885
3886 int __gmp_replacement_vsnprintf __GMP_PROTO ((char *, size_t, const char *, va_list));
3887 #endif /* _GMP_H_HAVE_VA_LIST */
3888
3889
3890 typedef int (*gmp_doscan_scan_t)  __GMP_PROTO ((void *, const char *, ...));
3891 typedef void *(*gmp_doscan_step_t) __GMP_PROTO ((void *, int));
3892 typedef int (*gmp_doscan_get_t)   __GMP_PROTO ((void *));
3893 typedef int (*gmp_doscan_unget_t) __GMP_PROTO ((int, void *));
3894
3895 struct gmp_doscan_funs_t {
3896   gmp_doscan_scan_t   scan;
3897   gmp_doscan_step_t   step;
3898   gmp_doscan_get_t    get;
3899   gmp_doscan_unget_t  unget;
3900 };
3901 extern const struct gmp_doscan_funs_t  __gmp_fscanf_funs;
3902 extern const struct gmp_doscan_funs_t  __gmp_sscanf_funs;
3903
3904 #if _GMP_H_HAVE_VA_LIST
3905 int __gmp_doscan __GMP_PROTO ((const struct gmp_doscan_funs_t *, void *,
3906                           const char *, va_list));
3907 #endif
3908
3909
3910 /* For testing and debugging.  */
3911 #define MPZ_CHECK_FORMAT(z)                                     \
3912   do {                                                          \
3913     ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0);   \
3914     ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z));                       \
3915     ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z));                       \
3916   } while (0)
3917
3918 #define MPQ_CHECK_FORMAT(q)                             \
3919   do {                                                  \
3920     MPZ_CHECK_FORMAT (mpq_numref (q));                  \
3921     MPZ_CHECK_FORMAT (mpq_denref (q));                  \
3922     ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1);            \
3923                                                         \
3924     if (SIZ(mpq_numref(q)) == 0)                        \
3925       {                                                 \
3926         /* should have zero as 0/1 */                   \
3927         ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1          \
3928                        && PTR(mpq_denref(q))[0] == 1);  \
3929       }                                                 \
3930     else                                                \
3931       {                                                 \
3932         /* should have no common factors */             \
3933         mpz_t  g;                                       \
3934         mpz_init (g);                                   \
3935         mpz_gcd (g, mpq_numref(q), mpq_denref(q));      \
3936         ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0);         \
3937         mpz_clear (g);                                  \
3938       }                                                 \
3939   } while (0)
3940
3941 #define MPF_CHECK_FORMAT(f)                             \
3942   do {                                                  \
3943     ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \
3944     ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1);              \
3945     if (SIZ(f) == 0)                                    \
3946       ASSERT_ALWAYS (EXP(f) == 0);                      \
3947     if (SIZ(f) != 0)                                    \
3948       ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0);        \
3949   } while (0)
3950
3951
3952 #define MPZ_PROVOKE_REALLOC(z)                                  \
3953   do { ALLOC(z) = ABSIZ(z); } while (0)
3954
3955
3956 /* Enhancement: The "mod" and "gcd_1" functions below could have
3957    __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on
3958    function pointers, only actual functions.  It probably doesn't make much
3959    difference to the gmp code, since hopefully we arrange calls so there's
3960    no great need for the compiler to move things around.  */
3961
3962 #if WANT_FAT_BINARY && HAVE_HOST_CPU_FAMILY_x86
3963 /* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST
3964    in mpn/x86/x86-defs.m4.  Be sure to update that when changing here.  */
3965 struct cpuvec_t {
3966   DECL_add_n           ((*add_n));
3967   DECL_addmul_1        ((*addmul_1));
3968   DECL_copyd           ((*copyd));
3969   DECL_copyi           ((*copyi));
3970   DECL_divexact_1      ((*divexact_1));
3971   DECL_divexact_by3c   ((*divexact_by3c));
3972   DECL_divrem_1        ((*divrem_1));
3973   DECL_gcd_1           ((*gcd_1));
3974   DECL_lshift          ((*lshift));
3975   DECL_mod_1           ((*mod_1));
3976   DECL_mod_34lsub1     ((*mod_34lsub1));
3977   DECL_modexact_1c_odd ((*modexact_1c_odd));
3978   DECL_mul_1           ((*mul_1));
3979   DECL_mul_basecase    ((*mul_basecase));
3980   DECL_preinv_divrem_1 ((*preinv_divrem_1));
3981   DECL_preinv_mod_1    ((*preinv_mod_1));
3982   DECL_rshift          ((*rshift));
3983   DECL_sqr_basecase    ((*sqr_basecase));
3984   DECL_sub_n           ((*sub_n));
3985   DECL_submul_1        ((*submul_1));
3986   int                  initialized;
3987   mp_size_t            mul_karatsuba_threshold;
3988   mp_size_t            mul_toom3_threshold;
3989   mp_size_t            sqr_karatsuba_threshold;
3990   mp_size_t            sqr_toom3_threshold;
3991 };
3992 __GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;
3993 #endif /* x86 fat binary */
3994
3995 void __gmpn_cpuvec_init __GMP_PROTO ((void));
3996
3997 /* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init()
3998    if that hasn't yet been done (to establish the right values).  */
3999 #define CPUVEC_THRESHOLD(field)                                               \
4000   ((LIKELY (__gmpn_cpuvec.initialized) ? 0 : (__gmpn_cpuvec_init (), 0)),     \
4001    __gmpn_cpuvec.field)
4002
4003
4004 #if HAVE_NATIVE_mpn_add_nc
4005 #define mpn_add_nc __MPN(add_nc)
4006 __GMP_DECLSPEC mp_limb_t mpn_add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
4007 #else
4008 static inline
4009 mp_limb_t
4010 mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4011 {
4012   mp_limb_t co;
4013   co = mpn_add_n (rp, up, vp, n);
4014   co += mpn_add_1 (rp, rp, n, ci);
4015   return co;
4016 }
4017 #endif
4018
4019 #if HAVE_NATIVE_mpn_sub_nc
4020 #define mpn_sub_nc __MPN(sub_nc)
4021 __GMP_DECLSPEC mp_limb_t mpn_sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
4022 #else
4023 static inline mp_limb_t
4024 mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4025 {
4026   mp_limb_t co;
4027   co = mpn_sub_n (rp, up, vp, n);
4028   co += mpn_sub_1 (rp, rp, n, ci);
4029   return co;
4030 }
4031 #endif
4032
4033 static inline int
4034 mpn_zero_p (mp_srcptr ap, mp_size_t n)
4035 {
4036   mp_size_t i;
4037   for (i = n - 1; i >= 0; i--)
4038     {
4039       if (ap[i] != 0)
4040         return 0;
4041     }
4042   return 1;
4043 }
4044
4045 #if TUNE_PROGRAM_BUILD
4046 /* Some extras wanted when recompiling some .c files for use by the tune
4047    program.  Not part of a normal build.
4048
4049    It's necessary to keep these thresholds as #defines (just to an
4050    identically named variable), since various defaults are established based
4051    on #ifdef in the .c files.  For some this is not so (the defaults are
4052    instead established above), but all are done this way for consistency. */
4053
4054 #undef  MUL_KARATSUBA_THRESHOLD
4055 #define MUL_KARATSUBA_THRESHOLD      mul_karatsuba_threshold
4056 extern mp_size_t                     mul_karatsuba_threshold;
4057
4058 #undef  MUL_TOOM3_THRESHOLD
4059 #define MUL_TOOM3_THRESHOLD          mul_toom3_threshold
4060 extern mp_size_t                     mul_toom3_threshold;
4061
4062 #undef  MUL_TOOM44_THRESHOLD
4063 #define MUL_TOOM44_THRESHOLD         mul_toom44_threshold
4064 extern mp_size_t                     mul_toom44_threshold;
4065
4066 #undef  MUL_FFT_THRESHOLD
4067 #define MUL_FFT_THRESHOLD            mul_fft_threshold
4068 extern mp_size_t                     mul_fft_threshold;
4069
4070 #undef  MUL_FFT_MODF_THRESHOLD
4071 #define MUL_FFT_MODF_THRESHOLD       mul_fft_modf_threshold
4072 extern mp_size_t                     mul_fft_modf_threshold;
4073
4074 #undef  MUL_FFT_TABLE
4075 #define MUL_FFT_TABLE                { 0 }
4076
4077 /* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should
4078    remain as zero (always use it). */
4079 #if ! HAVE_NATIVE_mpn_sqr_basecase
4080 #undef  SQR_BASECASE_THRESHOLD
4081 #define SQR_BASECASE_THRESHOLD       sqr_basecase_threshold
4082 extern mp_size_t                     sqr_basecase_threshold;
4083 #endif
4084
4085 #if TUNE_PROGRAM_BUILD_SQR
4086 #undef  SQR_KARATSUBA_THRESHOLD
4087 #define SQR_KARATSUBA_THRESHOLD      SQR_KARATSUBA_MAX_GENERIC
4088 #else
4089 #undef  SQR_KARATSUBA_THRESHOLD
4090 #define SQR_KARATSUBA_THRESHOLD      sqr_karatsuba_threshold
4091 extern mp_size_t                     sqr_karatsuba_threshold;
4092 #endif
4093
4094 #undef  SQR_TOOM3_THRESHOLD
4095 #define SQR_TOOM3_THRESHOLD          sqr_toom3_threshold
4096 extern mp_size_t                     sqr_toom3_threshold;
4097
4098 #undef  SQR_TOOM4_THRESHOLD
4099 #define SQR_TOOM4_THRESHOLD          sqr_toom4_threshold
4100 extern mp_size_t                     sqr_toom4_threshold;
4101
4102 #undef SQR_FFT_THRESHOLD
4103 #define SQR_FFT_THRESHOLD            sqr_fft_threshold
4104 extern mp_size_t                     sqr_fft_threshold;
4105
4106 #undef SQR_FFT_MODF_THRESHOLD
4107 #define SQR_FFT_MODF_THRESHOLD       sqr_fft_modf_threshold
4108 extern mp_size_t                     sqr_fft_modf_threshold;
4109
4110 #undef  SQR_FFT_TABLE
4111 #define SQR_FFT_TABLE                { 0 }
4112
4113 #undef  MULLOW_BASECASE_THRESHOLD
4114 #define MULLOW_BASECASE_THRESHOLD    mullow_basecase_threshold
4115 extern mp_size_t                     mullow_basecase_threshold;
4116
4117 #undef  MULLOW_DC_THRESHOLD
4118 #define MULLOW_DC_THRESHOLD          mullow_dc_threshold
4119 extern mp_size_t                     mullow_dc_threshold;
4120
4121 #undef  MULLOW_MUL_N_THRESHOLD
4122 #define MULLOW_MUL_N_THRESHOLD       mullow_mul_n_threshold
4123 extern mp_size_t                     mullow_mul_n_threshold;
4124
4125
4126 #if ! UDIV_PREINV_ALWAYS
4127 #undef  DIV_SB_PREINV_THRESHOLD
4128 #define DIV_SB_PREINV_THRESHOLD      div_sb_preinv_threshold
4129 extern mp_size_t                     div_sb_preinv_threshold;
4130 #endif
4131
4132 #undef  DIV_DC_THRESHOLD
4133 #define DIV_DC_THRESHOLD             div_dc_threshold
4134 extern mp_size_t                     div_dc_threshold;
4135
4136 #undef  POWM_THRESHOLD
4137 #define POWM_THRESHOLD               powm_threshold
4138 extern mp_size_t                     powm_threshold;
4139
4140 #undef  MATRIX22_STRASSEN_THRESHOLD
4141 #define MATRIX22_STRASSEN_THRESHOLD  matrix22_strassen_threshold
4142 extern mp_size_t                     matrix22_strassen_threshold;
4143
4144 #undef  HGCD_THRESHOLD
4145 #define HGCD_THRESHOLD               hgcd_threshold
4146 extern mp_size_t                     hgcd_threshold;
4147
4148 #undef  GCD_ACCEL_THRESHOLD
4149 #define GCD_ACCEL_THRESHOLD          gcd_accel_threshold
4150 extern mp_size_t                     gcd_accel_threshold;
4151
4152 #undef  GCD_DC_THRESHOLD
4153 #define GCD_DC_THRESHOLD             gcd_dc_threshold
4154 extern mp_size_t                     gcd_dc_threshold;
4155
4156 #undef GCDEXT_DC_THRESHOLD
4157 #define GCDEXT_DC_THRESHOLD          gcdext_dc_threshold
4158 extern mp_size_t                     gcdext_dc_threshold;
4159
4160 #undef DIVREM_1_NORM_THRESHOLD
4161 #define DIVREM_1_NORM_THRESHOLD      divrem_1_norm_threshold
4162 extern mp_size_t                     divrem_1_norm_threshold;
4163
4164 #undef DIVREM_1_UNNORM_THRESHOLD
4165 #define DIVREM_1_UNNORM_THRESHOLD    divrem_1_unnorm_threshold
4166 extern mp_size_t                     divrem_1_unnorm_threshold;
4167
4168 #undef MOD_1_NORM_THRESHOLD
4169 #define MOD_1_NORM_THRESHOLD         mod_1_norm_threshold
4170 extern mp_size_t                     mod_1_norm_threshold;
4171
4172 #undef MOD_1_UNNORM_THRESHOLD
4173 #define MOD_1_UNNORM_THRESHOLD       mod_1_unnorm_threshold
4174 extern mp_size_t                     mod_1_unnorm_threshold;
4175
4176 #undef MOD_1_1_THRESHOLD
4177 #define MOD_1_1_THRESHOLD            mod_1_1_threshold
4178 extern mp_size_t                     mod_1_1_threshold;
4179
4180 #undef MOD_1_2_THRESHOLD
4181 #define MOD_1_2_THRESHOLD            mod_1_2_threshold
4182 extern mp_size_t                     mod_1_2_threshold;
4183
4184 #undef MOD_1_3_THRESHOLD
4185 #define MOD_1_3_THRESHOLD            mod_1_3_threshold
4186 extern mp_size_t                     mod_1_3_threshold;
4187
4188 #undef MOD_1_4_THRESHOLD
4189 #define MOD_1_4_THRESHOLD            mod_1_4_threshold
4190 extern mp_size_t                     mod_1_4_threshold;
4191
4192 #if ! UDIV_PREINV_ALWAYS
4193 #undef  DIVREM_2_THRESHOLD
4194 #define DIVREM_2_THRESHOLD           divrem_2_threshold
4195 extern mp_size_t                     divrem_2_threshold;
4196 #endif
4197
4198 #undef  GET_STR_DC_THRESHOLD
4199 #define GET_STR_DC_THRESHOLD         get_str_dc_threshold
4200 extern mp_size_t                     get_str_dc_threshold;
4201
4202 #undef GET_STR_PRECOMPUTE_THRESHOLD
4203 #define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold
4204 extern mp_size_t                     get_str_precompute_threshold;
4205
4206 #undef  SET_STR_DC_THRESHOLD
4207 #define SET_STR_DC_THRESHOLD         set_str_dc_threshold
4208 extern mp_size_t                     set_str_dc_threshold;
4209
4210 #undef SET_STR_PRECOMPUTE_THRESHOLD
4211 #define SET_STR_PRECOMPUTE_THRESHOLD set_str_precompute_threshold
4212 extern mp_size_t                     set_str_precompute_threshold;
4213
4214 #undef  SET_STR_THRESHOLD
4215 #define SET_STR_THRESHOLD            set_str_threshold
4216 extern mp_size_t                     SET_STR_THRESHOLD;
4217
4218 #undef  FFT_TABLE_ATTRS
4219 #define FFT_TABLE_ATTRS
4220 extern mp_size_t  mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
4221
4222 /* Sizes the tune program tests up to, used in a couple of recompilations. */
4223 #undef MUL_KARATSUBA_THRESHOLD_LIMIT
4224 #undef MUL_TOOM3_THRESHOLD_LIMIT
4225 #undef MULLOW_BASECASE_THRESHOLD_LIMIT
4226 #undef SQR_TOOM3_THRESHOLD_LIMIT
4227 #define SQR_KARATSUBA_MAX_GENERIC       200
4228 #define MUL_KARATSUBA_THRESHOLD_LIMIT   700
4229 #define MUL_TOOM3_THRESHOLD_LIMIT       700
4230 #define SQR_TOOM3_THRESHOLD_LIMIT       400
4231 #define MUL_TOOM44_THRESHOLD_LIMIT     1000
4232 #define SQR_TOOM4_THRESHOLD_LIMIT      1000
4233 #define MULLOW_BASECASE_THRESHOLD_LIMIT 200
4234 #define GET_STR_THRESHOLD_LIMIT         150
4235
4236 /* "thresh" will normally be a variable when tuning, so use the cached
4237    result.  This helps mpn_sb_divrem_mn for instance.  */
4238 #undef  CACHED_ABOVE_THRESHOLD
4239 #define CACHED_ABOVE_THRESHOLD(cache, thresh)  (cache)
4240 #undef  CACHED_BELOW_THRESHOLD
4241 #define CACHED_BELOW_THRESHOLD(cache, thresh)  (cache)
4242
4243 #endif /* TUNE_PROGRAM_BUILD */
4244
4245 #if defined (__cplusplus)
4246 }
4247 #endif
4248
4249
4250 #ifdef __cplusplus
4251
4252 /* A little helper for a null-terminated __gmp_allocate_func string.
4253    The destructor ensures it's freed even if an exception is thrown.
4254    The len field is needed by the destructor, and can be used by anyone else
4255    to avoid a second strlen pass over the data.
4256
4257    Since our input is a C string, using strlen is correct.  Perhaps it'd be
4258    more C++-ish style to use std::char_traits<char>::length, but char_traits
4259    isn't available in gcc 2.95.4.  */
4260
4261 class gmp_allocated_string {
4262  public:
4263   char *str;
4264   size_t len;
4265   gmp_allocated_string(char *arg)
4266   {
4267     str = arg;
4268     len = std::strlen (str);
4269   }
4270   ~gmp_allocated_string()
4271   {
4272     (*__gmp_free_func) (str, len+1);
4273   }
4274 };
4275
4276 std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);
4277 int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
4278 void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
4279 void __gmp_doprnt_params_from_ios (struct doprnt_params_t *p, std::ios &o);
4280 std::ostream& __gmp_doprnt_integer_ostream (std::ostream &o, struct doprnt_params_t *p, char *s);
4281 extern const struct doprnt_funs_t  __gmp_asprintf_funs_noformat;
4282
4283 #endif /* __cplusplus */
4284
4285 #endif /* __GMP_IMPL_H__ */