1 /* Include file for internal GNU MP types and definitions.
3 THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
6 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
7 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
9 This file is part of the GNU MP Library.
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 License for more details.
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25 /* __GMP_DECLSPEC must be given on any global data that will be accessed
26 from outside libgmp, meaning from the test or development programs, or
27 from libgmpxx. Failing to do this will result in an incorrect address
28 being used for the accesses. On functions __GMP_DECLSPEC makes calls
29 from outside libgmp more efficient, but they'll still work fine without
33 #ifndef __GMP_IMPL_H__
34 #define __GMP_IMPL_H__
37 #include <intrinsics.h> /* for _popcnt */
40 /* limits.h is not used in general, since it's an ANSI-ism, and since on
41 solaris gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong
42 values (the ABI=64 values).
44 On Cray vector systems, however, we need the system limits.h since sizes
45 of signed and unsigned types can differ there, depending on compiler
46 options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail. For
47 reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and
48 short can be 24, 32, 46 or 64 bits, and different for ushort. */
54 /* For fat.h and other fat binary stuff.
55 No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
56 declared this way are only used to set function pointers in __gmp_cpuvec,
57 they're not called directly. */
58 #define DECL_add_n(name) \
59 mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t))
60 #define DECL_addmul_1(name) \
61 mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
62 #define DECL_copyd(name) \
63 void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
64 #define DECL_copyi(name) \
66 #define DECL_divexact_1(name) \
67 mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
68 #define DECL_divexact_by3c(name) \
69 mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
70 #define DECL_divrem_1(name) \
71 mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t))
72 #define DECL_gcd_1(name) \
73 mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
74 #define DECL_lshift(name) \
75 mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned))
76 #define DECL_mod_1(name) \
77 mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
78 #define DECL_mod_34lsub1(name) \
79 mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t))
80 #define DECL_modexact_1c_odd(name) \
81 mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
82 #define DECL_mul_1(name) \
84 #define DECL_mul_basecase(name) \
85 void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t))
86 #define DECL_preinv_divrem_1(name) \
87 mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int))
88 #define DECL_preinv_mod_1(name) \
89 mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
90 #define DECL_rshift(name) \
92 #define DECL_sqr_basecase(name) \
93 void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
94 #define DECL_sub_n(name) \
96 #define DECL_submul_1(name) \
99 #if ! __GMP_WITHIN_CONFIGURE
101 #include "gmp-mparam.h"
102 #include "fib_table.h"
103 #include "mp_bases.h"
109 #if HAVE_INTTYPES_H /* for uint_least32_t */
110 # include <inttypes.h>
118 #include <cstring> /* for strlen */
119 #include <string> /* for std::string */
123 #ifndef WANT_TMP_DEBUG /* for TMP_ALLOC_LIMBS_2 and others */
124 #define WANT_TMP_DEBUG 0
127 /* The following tries to get a good version of alloca. The tests are
128 adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.
129 Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will
130 be setup appropriately.
132 ifndef alloca - a cpp define might already exist.
133 glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.
134 HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.
136 GCC __builtin_alloca - preferred whenever available.
138 _AIX pragma - IBM compilers need a #pragma in "each module that needs to
139 use alloca". Pragma indented to protect pre-ANSI cpp's. _IBMR2 was
140 used in past versions of GMP, retained still in case it matters.
142 The autoconf manual says this pragma needs to be at the start of a C
143 file, apart from comments and preprocessor directives. Is that true?
144 xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc
150 # define alloca __builtin_alloca
153 # define alloca(x) __ALLOCA(x)
157 # define alloca _alloca
162 # if defined (_AIX) || defined (_IBMR2)
174 /* if not provided by gmp-mparam.h */
175 #ifndef BYTES_PER_MP_LIMB
176 #define BYTES_PER_MP_LIMB SIZEOF_MP_LIMB_T
178 #ifndef BITS_PER_MP_LIMB
179 #define BITS_PER_MP_LIMB (8 * SIZEOF_MP_LIMB_T)
182 #define BITS_PER_ULONG (8 * SIZEOF_UNSIGNED_LONG)
185 /* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */
186 #if HAVE_UINT_LEAST32_T
187 typedef uint_least32_t gmp_uint_least32_t;
189 #if SIZEOF_UNSIGNED_SHORT >= 4
190 typedef unsigned short gmp_uint_least32_t;
192 #if SIZEOF_UNSIGNED >= 4
193 typedef unsigned gmp_uint_least32_t;
195 typedef unsigned long gmp_uint_least32_t;
201 /* const and signed must match __gmp_const and __gmp_signed, so follow the
202 decision made for those in gmp.h. */
203 #if ! __GMP_HAVE_CONST
204 #define const /* empty */
205 #define signed /* empty */
208 /* "const" basically means a function does nothing but examine its arguments
209 and give a return value, it doesn't read or write any memory (neither
210 global nor pointed to by arguments), and has no other side-effects. This
211 is more restrictive than "pure". See info node "(gcc)Function
212 Attributes". __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn
213 this off when trying to write timing loops. */
214 #if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
215 #define ATTRIBUTE_CONST __attribute__ ((const))
217 #define ATTRIBUTE_CONST
220 #if HAVE_ATTRIBUTE_NORETURN
221 #define ATTRIBUTE_NORETURN __attribute__ ((noreturn))
223 #define ATTRIBUTE_NORETURN
226 /* "malloc" means a function behaves like malloc in that the pointer it
227 returns doesn't alias anything. */
228 #if HAVE_ATTRIBUTE_MALLOC
229 #define ATTRIBUTE_MALLOC __attribute__ ((malloc))
231 #define ATTRIBUTE_MALLOC
236 #define strchr(s,c) index(s,c)
240 #define memset(p, c, n) \
243 char *__memset__p = (p); \
245 for (__i = 0; __i < (n); __i++) \
246 __memset__p[__i] = (c); \
250 /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89
251 mode. Falling back to a memcpy will give maximum portability, since it
252 works no matter whether va_list is a pointer, struct or array. */
253 #if ! defined (va_copy) && defined (__va_copy)
254 #define va_copy(dst,src) __va_copy(dst,src)
256 #if ! defined (va_copy)
257 #define va_copy(dst,src) \
258 do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)
262 /* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions
263 (ie. ctlz, ctpop, cttz). */
264 #if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68 \
265 || HAVE_HOST_CPU_alphaev7
266 #define HAVE_HOST_CPU_alpha_CIX 1
270 #if defined (__cplusplus)
277 ptr = TMP_ALLOC (bytes);
280 Small allocations should use TMP_SALLOC, big allocations should use
281 TMP_BALLOC. Allocations that might be small or big should use TMP_ALLOC.
283 Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and
286 TMP_DECL just declares a variable, but might be empty and so must be last
287 in a list of variables. TMP_MARK must be done before any TMP_ALLOC.
288 TMP_ALLOC(0) is not allowed. TMP_FREE doesn't need to be done if a
289 TMP_MARK was made, but then no TMP_ALLOCs. */
291 /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or
292 __gmp_allocate_func doesn't already determine it. Currently TMP_ALLOC
293 isn't used for "double"s, so that's not in the union. */
298 #define __TMP_ALIGN sizeof (union tmp_align_t)
300 /* Return "a" rounded upwards to a multiple of "m", if it isn't already.
301 "a" must be an unsigned type.
302 This is designed for use with a compile-time constant "m".
303 The POW2 case is expected to be usual, and gcc 3.0 and up recognises
304 "(-(8*n))%8" or the like is always zero, which means the rounding up in
305 the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop. */
306 #define ROUND_UP_MULTIPLE(a,m) \
307 (POW2_P(m) ? (a) + (-(a))%(m) \
308 : (a)+(m)-1 - (((a)+(m)-1) % (m)))
310 #if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT)
311 struct tmp_reentrant_t {
312 struct tmp_reentrant_t *next;
313 size_t size; /* bytes, including header */
315 void *__gmp_tmp_reentrant_alloc __GMP_PROTO ((struct tmp_reentrant_t **, size_t)) ATTRIBUTE_MALLOC;
316 void __gmp_tmp_reentrant_free __GMP_PROTO ((struct tmp_reentrant_t *));
321 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker
323 #define TMP_MARK __tmp_marker = 0
324 #define TMP_SALLOC(n) alloca(n)
325 #define TMP_BALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
326 #define TMP_ALLOC(n) \
327 (LIKELY ((n) < 65536) ? TMP_SALLOC(n) : TMP_BALLOC(n))
331 if (UNLIKELY (__tmp_marker != 0)) __gmp_tmp_reentrant_free (__tmp_marker); \
335 #if WANT_TMP_REENTRANT
336 #define TMP_SDECL TMP_DECL
337 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker
338 #define TMP_SMARK TMP_MARK
339 #define TMP_MARK __tmp_marker = 0
340 #define TMP_SALLOC(n) TMP_ALLOC(n)
341 #define TMP_BALLOC(n) TMP_ALLOC(n)
342 #define TMP_ALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
343 #define TMP_SFREE TMP_FREE
344 #define TMP_FREE __gmp_tmp_reentrant_free (__tmp_marker)
347 #if WANT_TMP_NOTREENTRANT
350 struct tmp_stack *which_chunk;
353 void *__gmp_tmp_alloc __GMP_PROTO ((unsigned long)) ATTRIBUTE_MALLOC;
354 void __gmp_tmp_mark __GMP_PROTO ((struct tmp_marker *));
355 void __gmp_tmp_free __GMP_PROTO ((struct tmp_marker *));
356 #define TMP_SDECL TMP_DECL
357 #define TMP_DECL struct tmp_marker __tmp_marker
358 #define TMP_SMARK TMP_MARK
359 #define TMP_MARK __gmp_tmp_mark (&__tmp_marker)
360 #define TMP_SALLOC(n) TMP_ALLOC(n)
361 #define TMP_BALLOC(n) TMP_ALLOC(n)
362 #define TMP_ALLOC(n) \
363 __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN))
364 #define TMP_SFREE TMP_FREE
365 #define TMP_FREE __gmp_tmp_free (&__tmp_marker)
369 /* See tal-debug.c for some comments. */
371 struct tmp_debug_entry_t *list;
375 struct tmp_debug_entry_t {
376 struct tmp_debug_entry_t *next;
380 void __gmp_tmp_debug_mark __GMP_PROTO ((const char *, int, struct tmp_debug_t **,
381 struct tmp_debug_t *,
382 const char *, const char *));
383 void *__gmp_tmp_debug_alloc __GMP_PROTO ((const char *, int, int,
384 struct tmp_debug_t **, const char *,
385 size_t)) ATTRIBUTE_MALLOC;
386 void __gmp_tmp_debug_free __GMP_PROTO ((const char *, int, int,
387 struct tmp_debug_t **,
388 const char *, const char *));
389 #define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
390 #define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
391 #define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
392 #define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
393 #define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
394 #define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
395 /* The marker variable is designed to provoke an uninitialized variable
396 warning from the compiler if TMP_FREE is used without a TMP_MARK.
397 __tmp_marker_inscope does the same for TMP_ALLOC. Runtime tests pick
398 these things up too. */
399 #define TMP_DECL_NAME(marker, marker_name) \
401 int __tmp_marker_inscope; \
402 const char *__tmp_marker_name = marker_name; \
403 struct tmp_debug_t __tmp_marker_struct; \
404 /* don't demand NULL, just cast a zero */ \
405 struct tmp_debug_t *__tmp_marker = (struct tmp_debug_t *) 0
406 #define TMP_MARK_NAME(marker, marker_name) \
409 __tmp_marker_inscope = 1; \
410 __gmp_tmp_debug_mark (ASSERT_FILE, ASSERT_LINE, \
411 &__tmp_marker, &__tmp_marker_struct, \
412 __tmp_marker_name, marker_name); \
414 #define TMP_SALLOC(n) TMP_ALLOC(n)
415 #define TMP_BALLOC(n) TMP_ALLOC(n)
416 #define TMP_ALLOC(size) \
417 __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE, \
418 __tmp_marker_inscope, \
419 &__tmp_marker, __tmp_marker_name, size)
420 #define TMP_FREE_NAME(marker, marker_name) \
422 __gmp_tmp_debug_free (ASSERT_FILE, ASSERT_LINE, \
423 marker, &__tmp_marker, \
424 __tmp_marker_name, marker_name); \
426 #endif /* WANT_TMP_DEBUG */
429 /* Allocating various types. */
430 #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type)))
431 #define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type)))
432 #define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type)))
433 #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t)
434 #define TMP_SALLOC_LIMBS(n) TMP_SALLOC_TYPE(n,mp_limb_t)
435 #define TMP_BALLOC_LIMBS(n) TMP_BALLOC_TYPE(n,mp_limb_t)
436 #define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr)
437 #define TMP_SALLOC_MP_PTRS(n) TMP_SALLOC_TYPE(n,mp_ptr)
438 #define TMP_BALLOC_MP_PTRS(n) TMP_BALLOC_TYPE(n,mp_ptr)
440 /* It's more efficient to allocate one block than two. This is certainly
441 true of the malloc methods, but it can even be true of alloca if that
442 involves copying a chunk of stack (various RISCs), or a call to a stack
443 bounds check (mingw). In any case, when debugging keep separate blocks
444 so a redzoning malloc debugger can protect each individually. */
445 #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \
447 if (WANT_TMP_DEBUG) \
449 (xp) = TMP_ALLOC_LIMBS (xsize); \
450 (yp) = TMP_ALLOC_LIMBS (ysize); \
454 (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize)); \
455 (yp) = (xp) + (xsize); \
460 /* From gmp.h, nicer names for internal use. */
461 #define CRAY_Pragma(str) __GMP_CRAY_Pragma(str)
462 #define MPN_CMP(result, xp, yp, size) __GMPN_CMP(result, xp, yp, size)
463 #define LIKELY(cond) __GMP_LIKELY(cond)
464 #define UNLIKELY(cond) __GMP_UNLIKELY(cond)
466 #define ABS(x) ((x) >= 0 ? (x) : -(x))
468 #define MIN(l,o) ((l) < (o) ? (l) : (o))
470 #define MAX(h,i) ((h) > (i) ? (h) : (i))
471 #define numberof(x) (sizeof (x) / sizeof ((x)[0]))
473 /* Field access macros. */
474 #define SIZ(x) ((x)->_mp_size)
475 #define ABSIZ(x) ABS (SIZ (x))
476 #define PTR(x) ((x)->_mp_d)
477 #define LIMBS(x) ((x)->_mp_d)
478 #define EXP(x) ((x)->_mp_exp)
479 #define PREC(x) ((x)->_mp_prec)
480 #define ALLOC(x) ((x)->_mp_alloc)
482 /* n-1 inverts any low zeros and the lowest one bit. If n&(n-1) leaves zero
483 then that lowest one bit must have been the only bit set. n==0 will
484 return true though, so avoid that. */
485 #define POW2_P(n) (((n) & ((n) - 1)) == 0)
488 /* The "short" defines are a bit different because shorts are promoted to
491 #ifndef's are used since on some systems (HP?) header files other than
492 limits.h setup these defines. We could forcibly #undef in that case, but
493 there seems no need to worry about that. */
496 #define ULONG_MAX __GMP_ULONG_MAX
499 #define UINT_MAX __GMP_UINT_MAX
502 #define USHRT_MAX __GMP_USHRT_MAX
504 #define MP_LIMB_T_MAX (~ (mp_limb_t) 0)
506 /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be
507 unsigned on a K&R compiler. In particular the HP-UX 10 bundled K&R cc
508 treats the plain decimal values in <limits.h> as signed. */
509 #define ULONG_HIGHBIT (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))
510 #define UINT_HIGHBIT (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))
511 #define USHRT_HIGHBIT ((unsigned short) (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1)))
512 #define GMP_LIMB_HIGHBIT (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))
515 #define LONG_MIN ((long) ULONG_HIGHBIT)
518 #define LONG_MAX (-(LONG_MIN+1))
522 #define INT_MIN ((int) UINT_HIGHBIT)
525 #define INT_MAX (-(INT_MIN+1))
529 #define SHRT_MIN ((short) USHRT_HIGHBIT)
532 #define SHRT_MAX ((short) (-(SHRT_MIN+1)))
535 #if __GMP_MP_SIZE_T_INT
536 #define MP_SIZE_T_MAX INT_MAX
537 #define MP_SIZE_T_MIN INT_MIN
539 #define MP_SIZE_T_MAX LONG_MAX
540 #define MP_SIZE_T_MIN LONG_MIN
543 /* mp_exp_t is the same as mp_size_t */
544 #define MP_EXP_T_MAX MP_SIZE_T_MAX
545 #define MP_EXP_T_MIN MP_SIZE_T_MIN
547 #define LONG_HIGHBIT LONG_MIN
548 #define INT_HIGHBIT INT_MIN
549 #define SHRT_HIGHBIT SHRT_MIN
552 #define GMP_NUMB_HIGHBIT (CNST_LIMB(1) << (GMP_NUMB_BITS-1))
554 #if GMP_NAIL_BITS == 0
555 #define GMP_NAIL_LOWBIT CNST_LIMB(0)
557 #define GMP_NAIL_LOWBIT (CNST_LIMB(1) << GMP_NUMB_BITS)
560 #if GMP_NAIL_BITS != 0
561 /* Set various *_THRESHOLD values to be used for nails. Thus we avoid using
562 code that has not yet been qualified. */
564 #undef DIV_SB_PREINV_THRESHOLD
565 #undef DIV_DC_THRESHOLD
566 #undef POWM_THRESHOLD
567 #define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX
568 #define DIV_DC_THRESHOLD 50
569 #define POWM_THRESHOLD 0
571 #undef GCD_ACCEL_THRESHOLD
572 #define GCD_ACCEL_THRESHOLD 3
574 #undef DIVREM_1_NORM_THRESHOLD
575 #undef DIVREM_1_UNNORM_THRESHOLD
576 #undef MOD_1_NORM_THRESHOLD
577 #undef MOD_1_UNNORM_THRESHOLD
578 #undef USE_PREINV_DIVREM_1
579 #undef USE_PREINV_MOD_1
580 #undef DIVREM_2_THRESHOLD
581 #undef DIVEXACT_1_THRESHOLD
582 #undef MODEXACT_1_ODD_THRESHOLD
583 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
584 #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
585 #define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
586 #define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
587 #define USE_PREINV_DIVREM_1 0 /* no preinv */
588 #define USE_PREINV_MOD_1 0 /* no preinv */
589 #define DIVREM_2_THRESHOLD MP_SIZE_T_MAX /* no preinv */
591 /* mpn/generic/mul_fft.c is not nails-capable. */
592 #undef MUL_FFT_THRESHOLD
593 #undef SQR_FFT_THRESHOLD
594 #define MUL_FFT_THRESHOLD MP_SIZE_T_MAX
595 #define SQR_FFT_THRESHOLD MP_SIZE_T_MAX
600 #define MP_LIMB_T_SWAP(x, y) \
602 mp_limb_t __mp_limb_t_swap__tmp = (x); \
604 (y) = __mp_limb_t_swap__tmp; \
606 #define MP_SIZE_T_SWAP(x, y) \
608 mp_size_t __mp_size_t_swap__tmp = (x); \
610 (y) = __mp_size_t_swap__tmp; \
613 #define MP_PTR_SWAP(x, y) \
615 mp_ptr __mp_ptr_swap__tmp = (x); \
617 (y) = __mp_ptr_swap__tmp; \
619 #define MP_SRCPTR_SWAP(x, y) \
621 mp_srcptr __mp_srcptr_swap__tmp = (x); \
623 (y) = __mp_srcptr_swap__tmp; \
626 #define MPN_PTR_SWAP(xp,xs, yp,ys) \
628 MP_PTR_SWAP (xp, yp); \
629 MP_SIZE_T_SWAP (xs, ys); \
631 #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
633 MP_SRCPTR_SWAP (xp, yp); \
634 MP_SIZE_T_SWAP (xs, ys); \
637 #define MPZ_PTR_SWAP(x, y) \
639 mpz_ptr __mpz_ptr_swap__tmp = (x); \
641 (y) = __mpz_ptr_swap__tmp; \
643 #define MPZ_SRCPTR_SWAP(x, y) \
645 mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
647 (y) = __mpz_srcptr_swap__tmp; \
651 /* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))",
652 but current gcc (3.0) doesn't seem to support that. */
653 __GMP_DECLSPEC extern void * (*__gmp_allocate_func) __GMP_PROTO ((size_t));
654 __GMP_DECLSPEC extern void * (*__gmp_reallocate_func) __GMP_PROTO ((void *, size_t, size_t));
655 __GMP_DECLSPEC extern void (*__gmp_free_func) __GMP_PROTO ((void *, size_t));
657 void *__gmp_default_allocate __GMP_PROTO ((size_t));
658 void *__gmp_default_reallocate __GMP_PROTO ((void *, size_t, size_t));
659 void __gmp_default_free __GMP_PROTO ((void *, size_t));
661 #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
662 ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
663 #define __GMP_ALLOCATE_FUNC_LIMBS(n) __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
665 #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
666 ((type *) (*__gmp_reallocate_func) \
667 (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
668 #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
669 __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
671 #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
672 #define __GMP_FREE_FUNC_LIMBS(p,n) __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
674 #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize) \
676 if ((oldsize) != (newsize)) \
677 (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \
680 #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type) \
682 if ((oldsize) != (newsize)) \
683 (ptr) = (type *) (*__gmp_reallocate_func) \
684 (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type)); \
688 /* Dummy for non-gcc, code involving it will go dead. */
689 #if ! defined (__GNUC__) || __GNUC__ < 2
690 #define __builtin_constant_p(x) 0
694 /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the
695 stack usage is compatible. __attribute__ ((regparm (N))) helps by
696 putting leading parameters in registers, avoiding extra stack.
698 regparm cannot be used with calls going through the PLT, because the
699 binding code there may clobber the registers (%eax, %edx, %ecx) used for
700 the regparm parameters. Calls to local (ie. static) functions could
701 still use this, if we cared to differentiate locals and globals.
703 On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with
704 -p or -pg profiling, since that version of gcc doesn't realize the
705 .mcount calls will clobber the parameter registers. Other systems are
706 ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't
707 bother to try to detect this. regparm is only an optimization so we just
708 disable it when profiling (profiling being a slowdown anyway). */
710 #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \
711 && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF
712 #define USE_LEADING_REGPARM 1
714 #define USE_LEADING_REGPARM 0
717 /* Macros for altering parameter order according to regparm usage. */
718 #if USE_LEADING_REGPARM
719 #define REGPARM_2_1(a,b,x) x,a,b
720 #define REGPARM_3_1(a,b,c,x) x,a,b,c
721 #define REGPARM_ATTR(n) __attribute__ ((regparm (n)))
723 #define REGPARM_2_1(a,b,x) a,b,x
724 #define REGPARM_3_1(a,b,c,x) a,b,c,x
725 #define REGPARM_ATTR(n)
729 /* ASM_L gives a local label for a gcc asm block, for use when temporary
730 local labels like "1:" might not be available, which is the case for
731 instance on the x86s (the SCO assembler doesn't support them).
733 The label generated is made unique by including "%=" which is a unique
734 number for each insn. This ensures the same name can be used in multiple
735 asm blocks, perhaps via a macro. Since jumps between asm blocks are not
736 allowed there's no need for a label to be usable outside a single
739 #define ASM_L(name) LSYM_PREFIX "asm_%=_" #name
742 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86
744 /* FIXME: Check that these actually improve things.
745 FIXME: Need a cld after each std.
746 FIXME: Can't have inputs in clobbered registers, must describe them as
747 dummy outputs, and add volatile. */
748 #define MPN_COPY_INCR(DST, SRC, N) \
749 __asm__ ("cld\n\trep\n\tmovsl" : : \
750 "D" (DST), "S" (SRC), "c" (N) : \
751 "cx", "di", "si", "memory")
752 #define MPN_COPY_DECR(DST, SRC, N) \
753 __asm__ ("std\n\trep\n\tmovsl" : : \
754 "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \
755 "cx", "di", "si", "memory")
760 void __gmpz_aorsmul_1 __GMP_PROTO ((REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t))) REGPARM_ATTR(1);
761 #define mpz_aorsmul_1(w,u,v,sub) __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
763 #define mpz_n_pow_ui __gmpz_n_pow_ui
764 void mpz_n_pow_ui __GMP_PROTO ((mpz_ptr, mp_srcptr, mp_size_t, unsigned long));
767 #define mpn_addmul_1c __MPN(addmul_1c)
768 __GMP_DECLSPEC mp_limb_t mpn_addmul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
770 #define mpn_addmul_2 __MPN(addmul_2)
771 __GMP_DECLSPEC mp_limb_t mpn_addmul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
773 #define mpn_addmul_3 __MPN(addmul_3)
774 __GMP_DECLSPEC mp_limb_t mpn_addmul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
776 #define mpn_addmul_4 __MPN(addmul_4)
777 __GMP_DECLSPEC mp_limb_t mpn_addmul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
779 #define mpn_addmul_5 __MPN(addmul_5)
780 __GMP_DECLSPEC mp_limb_t mpn_addmul_5 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
782 #define mpn_addmul_6 __MPN(addmul_6)
783 __GMP_DECLSPEC mp_limb_t mpn_addmul_6 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
785 #define mpn_addmul_7 __MPN(addmul_7)
786 __GMP_DECLSPEC mp_limb_t mpn_addmul_7 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
788 #define mpn_addmul_8 __MPN(addmul_8)
789 __GMP_DECLSPEC mp_limb_t mpn_addmul_8 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
791 /* mpn_addlsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+2*{b,n}, and
792 returns the carry out (0, 1 or 2). */
793 #define mpn_addlsh1_n __MPN(addlsh1_n)
794 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
796 /* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and
797 returns the borrow out (0, 1 or 2). */
798 #define mpn_sublsh1_n __MPN(sublsh1_n)
799 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
801 /* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1,
802 and returns the bit rshifted out (0 or 1). */
803 #define mpn_rsh1add_n __MPN(rsh1add_n)
804 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
806 /* mpn_rsh1sub_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} - {b,n}) >> 1,
807 and returns the bit rshifted out (0 or 1). If there's a borrow from the
808 subtract, it's stored as a 1 in the high bit of c[n-1], like a twos
809 complement negative. */
810 #define mpn_rsh1sub_n __MPN(rsh1sub_n)
811 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
813 #define mpn_lshiftc __MPN(lshiftc)
814 __GMP_DECLSPEC mp_limb_t mpn_lshiftc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
816 #define mpn_addsub_n __MPN(addsub_n)
817 __GMP_DECLSPEC mp_limb_t mpn_addsub_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
819 #define mpn_addsub_nc __MPN(addsub_nc)
820 __GMP_DECLSPEC mp_limb_t mpn_addsub_nc __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
822 #define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0)
823 __GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
825 #define mpn_divrem_1c __MPN(divrem_1c)
826 __GMP_DECLSPEC mp_limb_t mpn_divrem_1c __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
828 #define mpn_dump __MPN(dump)
829 __GMP_DECLSPEC void mpn_dump __GMP_PROTO ((mp_srcptr, mp_size_t));
831 #define mpn_fib2_ui __MPN(fib2_ui)
832 mp_size_t mpn_fib2_ui __GMP_PROTO ((mp_ptr, mp_ptr, unsigned long));
834 /* Remap names of internal mpn functions. */
835 #define __clz_tab __MPN(clz_tab)
836 #define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv)
838 #define mpn_jacobi_base __MPN(jacobi_base)
839 int mpn_jacobi_base __GMP_PROTO ((mp_limb_t, mp_limb_t, int)) ATTRIBUTE_CONST;
841 #define mpn_mod_1c __MPN(mod_1c)
842 __GMP_DECLSPEC mp_limb_t mpn_mod_1c __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
844 #define mpn_mul_1c __MPN(mul_1c)
845 __GMP_DECLSPEC mp_limb_t mpn_mul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
847 #define mpn_mul_2 __MPN(mul_2)
848 mp_limb_t mpn_mul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
850 #define mpn_mul_3 __MPN(mul_3)
851 __GMP_DECLSPEC mp_limb_t mpn_mul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
853 #define mpn_mul_4 __MPN(mul_4)
854 __GMP_DECLSPEC mp_limb_t mpn_mul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
856 #ifndef mpn_mul_basecase /* if not done with cpuvec in a fat binary */
857 #define mpn_mul_basecase __MPN(mul_basecase)
858 __GMP_DECLSPEC void mpn_mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
861 #define mpn_mullow_n __MPN(mullow_n)
862 __GMP_DECLSPEC void mpn_mullow_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
864 #define mpn_mullow_basecase __MPN(mullow_basecase)
865 __GMP_DECLSPEC void mpn_mullow_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
867 #define mpn_sqr_n __MPN(sqr) /* compatibility */
869 #ifndef mpn_sqr_basecase /* if not done with cpuvec in a fat binary */
870 #define mpn_sqr_basecase __MPN(sqr_basecase)
871 __GMP_DECLSPEC void mpn_sqr_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
874 #define mpn_submul_1c __MPN(submul_1c)
875 __GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
877 #define mpn_invert_2exp __MPN(invert_2exp)
878 __GMP_DECLSPEC void mpn_invert_2exp __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
880 #define mpn_redc_1 __MPN(redc_1)
881 __GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);)
883 #define mpn_redc_2 __MPN(redc_2)
884 __GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
887 #define mpn_mod_1s_1p_cps __MPN(mod_1s_1p_cps)
888 __GMP_DECLSPEC void mpn_mod_1s_1p_cps __GMP_PROTO ((mp_limb_t [4], mp_limb_t));
889 #define mpn_mod_1s_1p __MPN(mod_1s_1p)
890 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_1p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]));
892 #define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps)
893 __GMP_DECLSPEC void mpn_mod_1s_2p_cps __GMP_PROTO ((mp_limb_t [5], mp_limb_t));
894 #define mpn_mod_1s_2p __MPN(mod_1s_2p)
895 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [5]));
897 #define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps)
898 __GMP_DECLSPEC void mpn_mod_1s_3p_cps __GMP_PROTO ((mp_limb_t [6], mp_limb_t));
899 #define mpn_mod_1s_3p __MPN(mod_1s_3p)
900 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [6]));
902 #define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps)
903 __GMP_DECLSPEC void mpn_mod_1s_4p_cps __GMP_PROTO ((mp_limb_t [7], mp_limb_t));
904 #define mpn_mod_1s_4p __MPN(mod_1s_4p)
905 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [7]));
908 typedef __gmp_randstate_struct *gmp_randstate_ptr;
909 typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
911 /* Pseudo-random number generator function pointers structure. */
913 void (*randseed_fn) __GMP_PROTO ((gmp_randstate_t, mpz_srcptr));
914 void (*randget_fn) __GMP_PROTO ((gmp_randstate_t, mp_ptr, unsigned long int));
915 void (*randclear_fn) __GMP_PROTO ((gmp_randstate_t));
916 void (*randiset_fn) __GMP_PROTO ((gmp_randstate_ptr, gmp_randstate_srcptr));
919 /* Macro to obtain a void pointer to the function pointers structure. */
920 #define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc)
922 /* Macro to obtain a pointer to the generator's state.
923 When used as a lvalue the rvalue needs to be cast to mp_ptr. */
924 #define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)
926 /* Write a given number of random bits to rp. */
927 #define _gmp_rand(rp, state, bits) \
929 gmp_randstate_ptr __rstate = (state); \
930 (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn) \
931 (__rstate, rp, bits); \
934 __GMP_DECLSPEC void __gmp_randinit_mt_noseed __GMP_PROTO ((gmp_randstate_t));
937 /* __gmp_rands is the global state for the old-style random functions, and
938 is also used in the test programs (hence the __GMP_DECLSPEC).
940 There's no seeding here, so mpz_random etc will generate the same
941 sequence every time. This is not unlike the C library random functions
942 if you don't seed them, so perhaps it's acceptable. Digging up a seed
943 from /dev/random or the like would work on many systems, but might
944 encourage a false confidence, since it'd be pretty much impossible to do
945 something that would work reliably everywhere. In any case the new style
946 functions are recommended to applications which care about randomness, so
947 the old functions aren't too important. */
949 __GMP_DECLSPEC extern char __gmp_rands_initialized;
950 __GMP_DECLSPEC extern gmp_randstate_t __gmp_rands;
953 ((__gmp_rands_initialized ? 0 \
954 : (__gmp_rands_initialized = 1, \
955 __gmp_randinit_mt_noseed (__gmp_rands), 0)), \
958 /* this is used by the test programs, to free memory */
959 #define RANDS_CLEAR() \
961 if (__gmp_rands_initialized) \
963 __gmp_rands_initialized = 0; \
964 gmp_randclear (__gmp_rands); \
969 /* FIXME: Make these itch functions less conservative. Also consider making
970 them dependent on just 'an', and compute the allocation directly from 'an'
972 static inline mp_size_t
973 mpn_toom22_mul_itch (mp_size_t an, mp_size_t bn)
975 mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
979 static inline mp_size_t
980 mpn_toom33_mul_itch (mp_size_t an, mp_size_t bn)
982 /* We could trim this to 4n+3 if HAVE_NATIVE_mpn_sublsh1_n, since
983 mpn_toom_interpolate_5pts only needs scratch otherwise. */
984 mp_size_t n = (an + 2) / (size_t) 3;
985 return 6 * n + GMP_NUMB_BITS;
988 static inline mp_size_t
989 mpn_toom44_mul_itch (mp_size_t an, mp_size_t bn)
991 mp_size_t n = (an + 3) >> 2;
992 return 12 * n + GMP_NUMB_BITS;
995 static inline mp_size_t
996 mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
998 mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
1002 static inline mp_size_t
1003 mpn_toom42_mul_itch (mp_size_t an, mp_size_t bn)
1005 /* We could trim this to 4n+3 if HAVE_NATIVE_mpn_sublsh1_n, since
1006 mpn_toom_interpolate_5pts only needs scratch otherwise. */
1007 mp_size_t n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
1011 static inline mp_size_t
1012 mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn)
1014 mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
1018 static inline mp_size_t
1019 mpn_toom2_sqr_itch (mp_size_t an)
1021 mp_size_t n = 1 + ((an - 1) >> 1);
1025 static inline mp_size_t
1026 mpn_toom3_sqr_itch (mp_size_t an)
1028 /* We could trim this to 4n+3 if HAVE_NATIVE_mpn_sublsh1_n, since
1029 mpn_toom_interpolate_5pts only needs scratch otherwise. */
1030 mp_size_t n = (an + 2) / (size_t) 3;
1031 return 6 * n + GMP_NUMB_BITS;
1034 static inline mp_size_t
1035 mpn_toom4_sqr_itch (mp_size_t an)
1037 mp_size_t n = (an + 3) >> 2;
1038 return 12 * n + GMP_NUMB_BITS;
1042 /* kara uses n+1 limbs of temporary space and then recurses with the balance,
1043 so need (n+1) + (ceil(n/2)+1) + (ceil(n/4)+1) + ... This can be solved to
1044 2n + o(n). Since n is very limited, o(n) in practice could be around 15.
1045 For now, assume n is arbitrarily large. */
1046 #define MPN_KARA_MUL_N_TSIZE(n) (2*(n) + 2*GMP_LIMB_BITS)
1047 #define MPN_KARA_SQR_N_TSIZE(n) (2*(n) + 2*GMP_LIMB_BITS)
1049 /* toom3 uses 2n + 2n/3 + o(n) limbs of temporary space if mpn_sublsh1_n is
1050 unavailable, but just 2n + o(n) if mpn_sublsh1_n is available. It is hard
1051 to pin down the value of o(n), since it is a complex function of
1052 MUL_TOOM3_THRESHOLD and n. Normally toom3 is used between kara and fft; in
1053 that case o(n) will be really limited. If toom3 is used for arbitrarily
1054 large operands, o(n) will be larger. These definitions handle operands of
1055 up to 8956264246117233 limbs. A single multiplication using toom3 on the
1056 fastest hardware currently (2008) would need 10 million years, which
1057 suggests that these limits are acceptable. */
1059 #if HAVE_NATIVE_mpn_sublsh1_n
1060 #define MPN_TOOM3_MUL_N_TSIZE(n) (2*(n) + 63)
1061 #define MPN_TOOM3_SQR_N_TSIZE(n) (2*(n) + 63)
1063 #define MPN_TOOM3_MUL_N_TSIZE(n) (2*(n) + 2*(n/3) + 63)
1064 #define MPN_TOOM3_SQR_N_TSIZE(n) (2*(n) + 2*(n/3) + 63)
1066 #else /* WANT_FFT */
1067 #if HAVE_NATIVE_mpn_sublsh1_n
1068 #define MPN_TOOM3_MUL_N_TSIZE(n) (2*(n) + 255)
1069 #define MPN_TOOM3_SQR_N_TSIZE(n) (2*(n) + 255)
1071 #define MPN_TOOM3_MUL_N_TSIZE(n) (2*(n) + 2*(n/3) + 255)
1072 #define MPN_TOOM3_SQR_N_TSIZE(n) (2*(n) + 2*(n/3) + 255)
1074 #define MPN_TOOM44_MAX_N 285405
1075 #endif /* WANT_FFT */
1077 /* need 2 so that n2>=1 */
1078 #define MPN_KARA_MUL_N_MINSIZE 2
1079 #define MPN_KARA_SQR_N_MINSIZE 2
1081 /* Need l>=1, ls>=1, and 2*ls > l (the latter for the tD MPN_INCR_U) */
1082 #define MPN_TOOM3_MUL_N_MINSIZE 17
1083 #define MPN_TOOM3_SQR_N_MINSIZE 17
1085 #define MPN_TOOM44_MUL_N_MINSIZE 30 /* ??? */
1086 #define MPN_TOOM4_SQR_N_MINSIZE 30 /* ??? */
1088 #define mpn_sqr_diagonal __MPN(sqr_diagonal)
1089 void mpn_sqr_diagonal __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1091 #define mpn_kara_mul_n __MPN(kara_mul_n)
1092 void mpn_kara_mul_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
1094 #define mpn_kara_sqr_n __MPN(kara_sqr_n)
1095 void mpn_kara_sqr_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1097 #define mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts)
1098 void mpn_toom_interpolate_5pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t, mp_ptr));
1100 enum toom4_flags { toom4_w1_neg = 1, toom4_w3_neg = 2 }; /* FIXME */
1101 #define mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts)
1102 void mpn_toom_interpolate_7pts __GMP_PROTO ((mp_ptr, mp_size_t, enum toom4_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
1104 #define mpn_toom3_mul_n __MPN(toom3_mul_n)
1105 void mpn_toom3_mul_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr));
1107 #define mpn_toom3_sqr_n __MPN(toom3_sqr_n)
1108 void mpn_toom3_sqr_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1110 #define mpn_toom22_mul __MPN(toom22_mul)
1111 void mpn_toom22_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1113 #define mpn_toom2_sqr __MPN(toom2_sqr)
1114 void mpn_toom2_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1116 #define mpn_toom33_mul __MPN(toom33_mul)
1117 void mpn_toom33_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1119 #define mpn_toom3_sqr __MPN(toom3_sqr)
1120 void mpn_toom3_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1122 #define mpn_toom44_mul __MPN(toom44_mul)
1123 void mpn_toom44_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1125 #define mpn_toom32_mul __MPN(toom32_mul)
1126 void mpn_toom32_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1128 #define mpn_toom42_mul __MPN(toom42_mul)
1129 void mpn_toom42_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1131 #define mpn_toom53_mul __MPN(toom53_mul)
1132 void mpn_toom53_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1134 #define mpn_toom62_mul __MPN(toom62_mul)
1135 void mpn_toom62_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1137 #define mpn_toom4_sqr __MPN(toom4_sqr)
1138 void mpn_toom4_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1140 #define mpn_fft_best_k __MPN(fft_best_k)
1141 int mpn_fft_best_k __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
1143 #define mpn_mul_fft __MPN(mul_fft)
1144 mp_limb_t mpn_mul_fft __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int));
1146 #define mpn_mul_fft_full __MPN(mul_fft_full)
1147 void mpn_mul_fft_full __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
1149 #define mpn_fft_next_size __MPN(fft_next_size)
1150 mp_size_t mpn_fft_next_size __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
1152 #define mpn_sb_divrem_mn __MPN(sb_divrem_mn)
1153 mp_limb_t mpn_sb_divrem_mn __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
1155 #define mpn_dc_divrem_n __MPN(dc_divrem_n)
1156 mp_limb_t mpn_dc_divrem_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
1158 #define mpn_sb_div_qr __MPN(sb_div_qr)
1159 mp_limb_t mpn_sb_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr));
1160 #define mpn_sb_div_q __MPN(sb_div_q)
1161 mp_limb_t mpn_sb_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr));
1162 #define mpn_sb_divappr_q __MPN(sb_divappr_q)
1163 mp_limb_t mpn_sb_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr));
1164 #define mpn_dc_div_qr __MPN(dc_div_qr)
1165 mp_limb_t mpn_dc_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
1166 #define mpn_dc_div_qr_n __MPN(dc_div_qr_n)
1167 mp_limb_t mpn_dc_div_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_ptr));
1168 #define mpn_dc_div_q __MPN(dc_div_q)
1169 mp_limb_t mpn_dc_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
1170 #define mpn_preinv_dc_div_qr __MPN(preinv_dc_div_qr)
1171 mp_limb_t mpn_preinv_dc_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr));
1172 #define mpn_dc_divappr_q __MPN(dc_divappr_q)
1173 mp_limb_t mpn_dc_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
1174 #define mpn_dc_divappr_q_n __MPN(dc_divappr_q_n)
1175 mp_limb_t mpn_dc_divappr_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_ptr));
1176 #define mpn_preinv_dc_divappr_q __MPN(preinv_dc_divappr_q)
1177 mp_limb_t mpn_preinv_dc_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr));
1178 #define mpn_mu_div_qr __MPN(mu_div_qr)
1179 mp_limb_t mpn_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1180 #define mpn_mu_div_qr_itch __MPN(mu_div_qr_itch)
1181 mp_size_t mpn_mu_div_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
1182 #define mpn_mu_div_qr_choose_in __MPN(mu_div_qr_choose_in)
1183 mp_size_t mpn_mu_div_qr_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
1184 #define mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr)
1185 void mpn_preinv_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1186 #define mpn_mu_divappr_q __MPN(mu_divappr_q)
1187 mp_limb_t mpn_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1188 #define mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch)
1189 mp_size_t mpn_mu_divappr_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
1190 #define mpn_mu_divappr_q_choose_in __MPN(mu_divappr_q_choose_in)
1191 mp_size_t mpn_mu_divappr_q_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
1192 #define mpn_preinv_mu_divappr_q __MPN(preinv_mu_divappr_q)
1193 void mpn_preinv_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1194 #define mpn_mu_div_q __MPN(mu_div_q)
1195 mp_limb_t mpn_mu_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1196 #define mpn_invert __MPN(invert)
1197 void mpn_invert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1198 #define mpn_invert_itch __MPN(invert_itch)
1199 mp_size_t mpn_invert_itch __GMP_PROTO ((mp_size_t));
1201 #define mpn_binvert __MPN(binvert)
1202 void mpn_binvert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1203 #define mpn_binvert_itch __MPN(binvert_itch)
1204 mp_size_t mpn_binvert_itch __GMP_PROTO ((mp_size_t));
1205 #define mpn_sb_bdiv_qr __MPN(sb_bdiv_qr)
1206 mp_limb_t mpn_sb_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1207 #define mpn_sb_bdiv_q __MPN(sb_bdiv_q)
1208 void mpn_sb_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1209 #define mpn_dc_bdiv_qr __MPN(dc_bdiv_qr)
1210 mp_limb_t mpn_dc_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1211 #define mpn_dc_bdiv_qr_n_itch __MPN(dc_bdiv_qr_n_itch)
1212 mp_size_t mpn_dc_bdiv_qr_n_itch __GMP_PROTO ((mp_size_t));
1213 #define mpn_dc_bdiv_qr_n __MPN(dc_bdiv_qr_n)
1214 mp_limb_t mpn_dc_bdiv_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
1215 #define mpn_dc_bdiv_q __MPN(dc_bdiv_q)
1216 void mpn_dc_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1217 #define mpn_dc_bdiv_q_n_itch __MPN(dc_bdiv_q_n_itch)
1218 mp_size_t mpn_dc_bdiv_q_n_itch __GMP_PROTO ((mp_size_t));
1219 #define mpn_dc_bdiv_q_n __MPN(dc_bdiv_q_n)
1220 void mpn_dc_bdiv_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
1221 #define mpn_mu_bdiv_qr __MPN(mu_bdiv_qr)
1222 void mpn_mu_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1223 #define mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch)
1224 mp_size_t mpn_mu_bdiv_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1225 #define mpn_mu_bdiv_q __MPN(mu_bdiv_q)
1226 void mpn_mu_bdiv_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1227 #define mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch)
1228 mp_size_t mpn_mu_bdiv_q_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1230 #define mpn_divexact __MPN(divexact)
1231 void mpn_divexact __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1232 #define mpn_divexact_itch __MPN(divexact_itch)
1233 mp_size_t mpn_divexact_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1236 #define mpn_bdiv_dbm1c __MPN(bdiv_dbm1c)
1237 mp_limb_t mpn_bdiv_dbm1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
1238 #define mpn_bdiv_dbm1(dst, src, size, divisor) \
1239 mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0))
1241 #define mpn_powm __MPN(powm)
1242 void mpn_powm __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1243 #define mpn_powlo __MPN(powlo)
1244 void mpn_powlo __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1246 #define mpn_powm_sec __MPN(powm_sec)
1247 void mpn_powm_sec __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1248 #define mpn_subcnd_n __MPN(subcnd_n)
1249 mp_limb_t mpn_subcnd_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
1250 #define mpn_tabselect __MPN(tabselect)
1251 void mpn_tabselect __GMP_PROTO ((volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t));
1253 #ifndef DIVEXACT_BY3_METHOD
1254 #if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c)
1255 #define DIVEXACT_BY3_METHOD 0 /* default to using mpn_bdiv_dbm1c */
1257 #define DIVEXACT_BY3_METHOD 1
1261 #if DIVEXACT_BY3_METHOD == 0
1262 #undef mpn_divexact_by3
1263 #define mpn_divexact_by3(dst,src,size) \
1264 (3 & mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3)))
1265 /* override mpn_divexact_by3c defined in gmp.h */
1267 #undef mpn_divexact_by3c
1268 #define mpn_divexact_by3c(dst,src,size,cy) \
1269 (3 & mpn_bdiv_dbm1c (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * cy)))
1273 #if GMP_NUMB_BITS % 4 == 0
1274 #define mpn_divexact_by5(dst,src,size) \
1275 (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5)))
1278 #if GMP_NUMB_BITS % 6 == 0
1279 #define mpn_divexact_by7(dst,src,size) \
1280 (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7)))
1283 #if GMP_NUMB_BITS % 6 == 0
1284 #define mpn_divexact_by9(dst,src,size) \
1285 (15 & 7 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 9)))
1288 #if GMP_NUMB_BITS % 10 == 0
1289 #define mpn_divexact_by11(dst,src,size) \
1290 (15 & 5 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 11)))
1293 #if GMP_NUMB_BITS % 12 == 0
1294 #define mpn_divexact_by13(dst,src,size) \
1295 (15 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 13)))
1298 #if GMP_NUMB_BITS % 4 == 0
1299 #define mpn_divexact_by15(dst,src,size) \
1300 (15 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 15)))
1303 #define mpz_divexact_gcd __gmpz_divexact_gcd
1304 void mpz_divexact_gcd __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
1306 #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
1307 #ifdef _GMP_H_HAVE_FILE
1308 size_t mpz_inp_str_nowhite __GMP_PROTO ((mpz_ptr, FILE *, int, int, size_t));
1311 #define mpn_divisible_p __MPN(divisible_p)
1312 int mpn_divisible_p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
1314 #define mpn_rootrem __MPN(rootrem)
1315 mp_size_t mpn_rootrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
1319 #define MPN_COPY_INCR(dst, src, n) \
1321 int __i; /* Faster on some Crays with plain int */ \
1322 _Pragma ("_CRI ivdep"); \
1323 for (__i = 0; __i < (n); __i++) \
1324 (dst)[__i] = (src)[__i]; \
1328 /* used by test programs, hence __GMP_DECLSPEC */
1329 #ifndef mpn_copyi /* if not done with cpuvec in a fat binary */
1330 #define mpn_copyi __MPN(copyi)
1331 __GMP_DECLSPEC void mpn_copyi __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1334 #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
1335 #define MPN_COPY_INCR(dst, src, size) \
1337 ASSERT ((size) >= 0); \
1338 ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \
1339 mpn_copyi (dst, src, size); \
1343 /* Copy N limbs from SRC to DST incrementing, N==0 allowed. */
1344 #if ! defined (MPN_COPY_INCR)
1345 #define MPN_COPY_INCR(dst, src, n) \
1347 ASSERT ((n) >= 0); \
1348 ASSERT (MPN_SAME_OR_INCR_P (dst, src, n)); \
1351 mp_size_t __n = (n) - 1; \
1352 mp_ptr __dst = (dst); \
1353 mp_srcptr __src = (src); \
1372 #define MPN_COPY_DECR(dst, src, n) \
1374 int __i; /* Faster on some Crays with plain int */ \
1375 _Pragma ("_CRI ivdep"); \
1376 for (__i = (n) - 1; __i >= 0; __i--) \
1377 (dst)[__i] = (src)[__i]; \
1381 /* used by test programs, hence __GMP_DECLSPEC */
1382 #ifndef mpn_copyd /* if not done with cpuvec in a fat binary */
1383 #define mpn_copyd __MPN(copyd)
1384 __GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1387 #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
1388 #define MPN_COPY_DECR(dst, src, size) \
1390 ASSERT ((size) >= 0); \
1391 ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \
1392 mpn_copyd (dst, src, size); \
1396 /* Copy N limbs from SRC to DST decrementing, N==0 allowed. */
1397 #if ! defined (MPN_COPY_DECR)
1398 #define MPN_COPY_DECR(dst, src, n) \
1400 ASSERT ((n) >= 0); \
1401 ASSERT (MPN_SAME_OR_DECR_P (dst, src, n)); \
1404 mp_size_t __n = (n) - 1; \
1405 mp_ptr __dst = (dst) + __n; \
1406 mp_srcptr __src = (src) + __n; \
1425 #define MPN_COPY(d,s,n) \
1427 ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n)); \
1428 MPN_COPY_INCR (d, s, n); \
1433 /* Set {dst,size} to the limbs of {src,size} in reverse order. */
1434 #define MPN_REVERSE(dst, src, size) \
1436 mp_ptr __dst = (dst); \
1437 mp_size_t __size = (size); \
1438 mp_srcptr __src = (src) + __size - 1; \
1440 ASSERT ((size) >= 0); \
1441 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
1442 CRAY_Pragma ("_CRI ivdep"); \
1443 for (__i = 0; __i < __size; __i++) \
1452 /* Zero n limbs at dst.
1454 For power and powerpc we want an inline stu/bdnz loop for zeroing. On
1455 ppc630 for instance this is optimal since it can sustain only 1 store per
1458 gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the
1459 "for" loop in the generic code below can become stu/bdnz. The do/while
1460 here helps it get to that. The same caveat about plain -mpowerpc64 mode
1461 applies here as to __GMPN_COPY_INCR in gmp.h.
1463 xlc 3.1 already generates stu/bdnz from the generic C, and does so from
1466 Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines
1467 at a time. MPN_ZERO isn't all that important in GMP, so it might be more
1468 trouble than it's worth to do the same, though perhaps a call to memset
1469 would be good when on a GNU system. */
1471 #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
1472 #define MPN_ZERO(dst, n) \
1474 ASSERT ((n) >= 0); \
1477 mp_ptr __dst = (dst) - 1; \
1478 mp_size_t __n = (n); \
1487 #define MPN_ZERO(dst, n) \
1489 ASSERT ((n) >= 0); \
1492 mp_ptr __dst = (dst); \
1493 mp_size_t __n = (n); \
1502 /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
1503 start up and would need to strip a lot of zeros before it'd be faster
1504 than a simple cmpl loop. Here are some times in cycles for
1505 std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
1514 #ifndef MPN_NORMALIZE
1515 #define MPN_NORMALIZE(DST, NLIMBS) \
1517 while ((NLIMBS) > 0) \
1519 if ((DST)[(NLIMBS) - 1] != 0) \
1525 #ifndef MPN_NORMALIZE_NOT_ZERO
1526 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
1528 ASSERT ((NLIMBS) >= 1); \
1531 if ((DST)[(NLIMBS) - 1] != 0) \
1538 /* Strip least significant zero limbs from {ptr,size} by incrementing ptr
1539 and decrementing size. low should be ptr[0], and will be the new ptr[0]
1540 on returning. The number in {ptr,size} must be non-zero, ie. size!=0 and
1541 somewhere a non-zero limb. */
1542 #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) \
1544 ASSERT ((size) >= 1); \
1545 ASSERT ((low) == (ptr)[0]); \
1547 while ((low) == 0) \
1550 ASSERT ((size) >= 1); \
1556 /* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
1557 temporary variable; it will be automatically cleared out at function
1558 return. We use __x here to make it possible to accept both mpz_ptr and
1560 #define MPZ_TMP_INIT(X, NLIMBS) \
1562 mpz_ptr __x = (X); \
1563 ASSERT ((NLIMBS) >= 1); \
1564 __x->_mp_alloc = (NLIMBS); \
1565 __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB); \
1568 /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */
1569 #define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \
1570 ? (mp_ptr) _mpz_realloc(z,n) \
1573 #define MPZ_EQUAL_1_P(z) (SIZ(z)==1 && PTR(z)[0] == 1)
1576 /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
1579 From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
1580 nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio. So the
1581 number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
1583 The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
1584 without good floating point. There's +2 for rounding up, and a further
1585 +2 since at the last step x limbs are doubled into a 2x+1 limb region
1586 whereas the actual F[2k] value might be only 2x-1 limbs.
1588 Note that a division is done first, since on a 32-bit system it's at
1589 least conceivable to go right up to n==ULONG_MAX. (F[2^32-1] would be
1590 about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
1591 whatever a multiply of two 190Mbyte numbers takes.)
1593 Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
1594 worked into the multiplier. */
1596 #define MPN_FIB2_SIZE(n) \
1597 ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
1600 /* FIB_TABLE(n) returns the Fibonacci number F[n]. Must have n in the range
1601 -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h).
1603 FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] =
1604 F[n] + 2*F[n-1] fits in a limb. */
1606 __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
1607 #define FIB_TABLE(n) (__gmp_fib_table[(n)+1])
1610 /* For a threshold between algorithms A and B, size>=thresh is where B
1611 should be used. Special value MP_SIZE_T_MAX means only ever use A, or
1612 value 0 means only ever use B. The tests for these special values will
1613 be compile-time constants, so the compiler should be able to eliminate
1614 the code for the unwanted algorithm. */
1616 #define ABOVE_THRESHOLD(size,thresh) \
1618 || ((thresh) != MP_SIZE_T_MAX \
1619 && (size) >= (thresh)))
1620 #define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh))
1622 /* Usage: int use_foo = BELOW_THRESHOLD (size, FOO_THRESHOLD);
1624 if (CACHED_BELOW_THRESHOLD (use_foo, size, FOO_THRESHOLD))
1626 When "use_foo" is a constant (thresh is 0 or MP_SIZE_T), gcc prior to
1627 version 3.3 doesn't optimize away a test "if (use_foo)" when within a
1628 loop. CACHED_BELOW_THRESHOLD helps it do so. */
1630 #define CACHED_ABOVE_THRESHOLD(cache, thresh) \
1631 ((thresh) == 0 || (thresh) == MP_SIZE_T_MAX \
1632 ? ABOVE_THRESHOLD (0, thresh) \
1634 #define CACHED_BELOW_THRESHOLD(cache, thresh) \
1635 ((thresh) == 0 || (thresh) == MP_SIZE_T_MAX \
1636 ? BELOW_THRESHOLD (0, thresh) \
1640 /* If MUL_KARATSUBA_THRESHOLD is not already defined, define it to a
1641 value which is good on most machines. */
1642 #ifndef MUL_KARATSUBA_THRESHOLD
1643 #define MUL_KARATSUBA_THRESHOLD 32
1646 /* If MUL_TOOM3_THRESHOLD is not already defined, define it to a
1647 value which is good on most machines. */
1648 #ifndef MUL_TOOM3_THRESHOLD
1649 #define MUL_TOOM3_THRESHOLD 128
1652 #ifndef MUL_TOOM44_THRESHOLD
1653 #define MUL_TOOM44_THRESHOLD 500
1656 /* Source compatibility while source is in flux. */
1657 #define MUL_TOOM22_THRESHOLD MUL_KARATSUBA_THRESHOLD
1658 #define MUL_TOOM33_THRESHOLD MUL_TOOM3_THRESHOLD
1659 #define SQR_TOOM2_THRESHOLD SQR_KARATSUBA_THRESHOLD
1661 /* MUL_KARATSUBA_THRESHOLD_LIMIT is the maximum for MUL_KARATSUBA_THRESHOLD.
1662 In a normal build MUL_KARATSUBA_THRESHOLD is a constant and we use that.
1663 In a fat binary or tune program build MUL_KARATSUBA_THRESHOLD is a
1664 variable and a separate hard limit will have been defined. Similarly for
1666 #ifndef MUL_KARATSUBA_THRESHOLD_LIMIT
1667 #define MUL_KARATSUBA_THRESHOLD_LIMIT MUL_KARATSUBA_THRESHOLD
1669 #ifndef MUL_TOOM3_THRESHOLD_LIMIT
1670 #define MUL_TOOM3_THRESHOLD_LIMIT MUL_TOOM3_THRESHOLD
1672 #ifndef MULLOW_BASECASE_THRESHOLD_LIMIT
1673 #define MULLOW_BASECASE_THRESHOLD_LIMIT MULLOW_BASECASE_THRESHOLD
1676 /* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
1677 mpn_mul_basecase in mpn_sqr_n. Default is to use mpn_sqr_basecase
1678 always. (Note that we certainly always want it if there's a native
1679 assembler mpn_sqr_basecase.)
1681 If it turns out that mpn_kara_sqr_n becomes faster than mpn_mul_basecase
1682 before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the
1683 karatsuba threshold and SQR_KARATSUBA_THRESHOLD is 0. This oddity arises
1684 more or less because SQR_KARATSUBA_THRESHOLD represents the size up to
1685 which mpn_sqr_basecase should be used, and that may be never. */
1687 #ifndef SQR_BASECASE_THRESHOLD
1688 #define SQR_BASECASE_THRESHOLD 0
1691 #ifndef SQR_KARATSUBA_THRESHOLD
1692 #define SQR_KARATSUBA_THRESHOLD (2*MUL_KARATSUBA_THRESHOLD)
1695 #ifndef SQR_TOOM3_THRESHOLD
1696 #define SQR_TOOM3_THRESHOLD 128
1699 #ifndef SQR_TOOM4_THRESHOLD
1700 #define SQR_TOOM4_THRESHOLD 500
1703 /* See comments above about MUL_TOOM3_THRESHOLD_LIMIT. */
1704 #ifndef SQR_TOOM3_THRESHOLD_LIMIT
1705 #define SQR_TOOM3_THRESHOLD_LIMIT SQR_TOOM3_THRESHOLD
1708 #ifndef DC_DIV_QR_THRESHOLD
1709 #define DC_DIV_QR_THRESHOLD 43
1712 #ifndef DC_DIVAPPR_Q_THRESHOLD
1713 #define DC_DIVAPPR_Q_THRESHOLD 208
1716 #ifndef DC_DIV_Q_THRESHOLD
1717 #define DC_DIV_Q_THRESHOLD 228
1720 #ifndef DC_BDIV_QR_THRESHOLD
1721 #define DC_BDIV_QR_THRESHOLD 52
1724 #ifndef DC_BDIV_Q_THRESHOLD
1725 #define DC_BDIV_Q_THRESHOLD 224
1728 #ifndef DIVEXACT_JEB_THRESHOLD
1729 #define DIVEXACT_JEB_THRESHOLD 25
1732 #ifndef INV_NEWTON_THRESHOLD
1733 #define INV_NEWTON_THRESHOLD 654
1736 #ifndef BINV_NEWTON_THRESHOLD
1737 #define BINV_NEWTON_THRESHOLD 807
1740 #ifndef MU_DIVAPPR_Q_THRESHOLD
1741 #define MU_DIVAPPR_Q_THRESHOLD 4000
1744 #ifndef MU_DIV_Q_THRESHOLD
1745 #define MU_DIV_Q_THRESHOLD 4000
1748 #ifndef MU_BDIV_Q_THRESHOLD
1749 #define MU_BDIV_Q_THRESHOLD 2000
1752 /* First k to use for an FFT modF multiply. A modF FFT is an order
1753 log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
1754 whereas k=4 is 1.33 which is faster than toom3 at 1.485. */
1755 #define FFT_FIRST_K 4
1757 /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
1758 #ifndef MUL_FFT_MODF_THRESHOLD
1759 #define MUL_FFT_MODF_THRESHOLD (MUL_TOOM3_THRESHOLD * 3)
1761 #ifndef SQR_FFT_MODF_THRESHOLD
1762 #define SQR_FFT_MODF_THRESHOLD (SQR_TOOM3_THRESHOLD * 3)
1765 /* Threshold at which FFT should be used to do an NxN -> 2N multiply. This
1766 will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
1767 NxN->2N multiply and not recursing into itself is an order
1768 log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
1769 is the first better than toom3. */
1770 #ifndef MUL_FFT_THRESHOLD
1771 #define MUL_FFT_THRESHOLD (MUL_FFT_MODF_THRESHOLD * 10)
1773 #ifndef SQR_FFT_THRESHOLD
1774 #define SQR_FFT_THRESHOLD (SQR_FFT_MODF_THRESHOLD * 10)
1777 /* Table of thresholds for successive modF FFT "k"s. The first entry is
1778 where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
1779 etc. See mpn_fft_best_k(). */
1780 #ifndef MUL_FFT_TABLE
1781 #define MUL_FFT_TABLE \
1782 { MUL_TOOM3_THRESHOLD * 4, /* k=5 */ \
1783 MUL_TOOM3_THRESHOLD * 8, /* k=6 */ \
1784 MUL_TOOM3_THRESHOLD * 16, /* k=7 */ \
1785 MUL_TOOM3_THRESHOLD * 32, /* k=8 */ \
1786 MUL_TOOM3_THRESHOLD * 96, /* k=9 */ \
1787 MUL_TOOM3_THRESHOLD * 288, /* k=10 */ \
1790 #ifndef SQR_FFT_TABLE
1791 #define SQR_FFT_TABLE \
1792 { SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \
1793 SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \
1794 SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \
1795 SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \
1796 SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \
1797 SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \
1801 #ifndef FFT_TABLE_ATTRS
1802 #define FFT_TABLE_ATTRS static const
1805 #define MPN_FFT_TABLE_SIZE 16
1808 /* mpn_dc_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
1809 div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
1810 i.e. n/2 >= MUL_KARATSUBA_THRESHOLD
1812 Measured values are between 2 and 4 times MUL_KARATSUBA_THRESHOLD, so go
1813 for 3 as an average. */
1815 #ifndef DIV_DC_THRESHOLD
1816 #define DIV_DC_THRESHOLD (3 * MUL_KARATSUBA_THRESHOLD)
1819 #ifndef GET_STR_DC_THRESHOLD
1820 #define GET_STR_DC_THRESHOLD 18
1823 #ifndef GET_STR_PRECOMPUTE_THRESHOLD
1824 #define GET_STR_PRECOMPUTE_THRESHOLD 35
1827 #ifndef SET_STR_DC_THRESHOLD
1828 #define SET_STR_DC_THRESHOLD 750
1831 #ifndef SET_STR_PRECOMPUTE_THRESHOLD
1832 #define SET_STR_PRECOMPUTE_THRESHOLD 2000
1835 /* Return non-zero if xp,xsize and yp,ysize overlap.
1836 If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
1837 overlap. If both these are false, there's an overlap. */
1838 #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
1839 ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
1840 #define MEM_OVERLAP_P(xp, xsize, yp, ysize) \
1841 ( (char *) (xp) + (xsize) > (char *) (yp) \
1842 && (char *) (yp) + (ysize) > (char *) (xp))
1844 /* Return non-zero if xp,xsize and yp,ysize are either identical or not
1845 overlapping. Return zero if they're partially overlapping. */
1846 #define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \
1847 MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
1848 #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \
1849 ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
1851 /* Return non-zero if dst,dsize and src,ssize are either identical or
1852 overlapping in a way suitable for an incrementing/decrementing algorithm.
1853 Return zero if they're partially overlapping in an unsuitable fashion. */
1854 #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) \
1855 ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
1856 #define MPN_SAME_OR_INCR_P(dst, src, size) \
1857 MPN_SAME_OR_INCR2_P(dst, size, src, size)
1858 #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) \
1859 ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
1860 #define MPN_SAME_OR_DECR_P(dst, src, size) \
1861 MPN_SAME_OR_DECR2_P(dst, size, src, size)
1864 /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
1865 ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
1866 does it always. Generally assertions are meant for development, but
1867 might help when looking for a problem later too.
1869 Note that strings shouldn't be used within the ASSERT expression,
1870 eg. ASSERT(strcmp(s,"notgood")!=0), since the quotes upset the "expr"
1871 used in the !HAVE_STRINGIZE case (ie. K&R). */
1874 #define ASSERT_LINE __LINE__
1876 #define ASSERT_LINE -1
1880 #define ASSERT_FILE __FILE__
1882 #define ASSERT_FILE ""
1885 void __gmp_assert_header __GMP_PROTO ((const char *, int));
1886 __GMP_DECLSPEC void __gmp_assert_fail __GMP_PROTO ((const char *, int, const char *)) ATTRIBUTE_NORETURN;
1889 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
1891 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
1894 #define ASSERT_ALWAYS(expr) \
1897 ASSERT_FAIL (expr); \
1901 #define ASSERT(expr) ASSERT_ALWAYS (expr)
1903 #define ASSERT(expr) do {} while (0)
1907 /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
1908 that it's zero. In both cases if assertion checking is disabled the
1909 expression is still evaluated. These macros are meant for use with
1910 routines like mpn_add_n() where the return value represents a carry or
1911 whatever that should or shouldn't occur in some context. For example,
1912 ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
1914 #define ASSERT_CARRY(expr) ASSERT_ALWAYS ((expr) != 0)
1915 #define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0)
1917 #define ASSERT_CARRY(expr) (expr)
1918 #define ASSERT_NOCARRY(expr) (expr)
1922 /* ASSERT_CODE includes code when assertion checking is wanted. This is the
1923 same as writing "#if WANT_ASSERT", but more compact. */
1925 #define ASSERT_CODE(expr) expr
1927 #define ASSERT_CODE(expr)
1931 /* Test that an mpq_t is in fully canonical form. This can be used as
1932 protection on routines like mpq_equal which give wrong results on
1933 non-canonical inputs. */
1935 #define ASSERT_MPQ_CANONICAL(q) \
1937 ASSERT (q->_mp_den._mp_size > 0); \
1938 if (q->_mp_num._mp_size == 0) \
1940 /* zero should be 0/1 */ \
1941 ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0); \
1945 /* no common factors */ \
1948 mpz_gcd (__g, mpq_numref(q), mpq_denref(q)); \
1949 ASSERT (mpz_cmp_ui (__g, 1) == 0); \
1954 #define ASSERT_MPQ_CANONICAL(q) do {} while (0)
1957 /* Check that the nail parts are zero. */
1958 #define ASSERT_ALWAYS_LIMB(limb) \
1960 mp_limb_t __nail = (limb) & GMP_NAIL_MASK; \
1961 ASSERT_ALWAYS (__nail == 0); \
1963 #define ASSERT_ALWAYS_MPN(ptr, size) \
1965 /* let whole loop go dead when no nails */ \
1966 if (GMP_NAIL_BITS != 0) \
1969 for (__i = 0; __i < (size); __i++) \
1970 ASSERT_ALWAYS_LIMB ((ptr)[__i]); \
1974 #define ASSERT_LIMB(limb) ASSERT_ALWAYS_LIMB (limb)
1975 #define ASSERT_MPN(ptr, size) ASSERT_ALWAYS_MPN (ptr, size)
1977 #define ASSERT_LIMB(limb) do {} while (0)
1978 #define ASSERT_MPN(ptr, size) do {} while (0)
1982 /* Assert that an mpn region {ptr,size} is zero, or non-zero.
1983 size==0 is allowed, and in that case {ptr,size} considered to be zero. */
1985 #define ASSERT_MPN_ZERO_P(ptr,size) \
1988 ASSERT ((size) >= 0); \
1989 for (__i = 0; __i < (size); __i++) \
1990 ASSERT ((ptr)[__i] == 0); \
1992 #define ASSERT_MPN_NONZERO_P(ptr,size) \
1995 int __nonzero = 0; \
1996 ASSERT ((size) >= 0); \
1997 for (__i = 0; __i < (size); __i++) \
1998 if ((ptr)[__i] != 0) \
2003 ASSERT (__nonzero); \
2006 #define ASSERT_MPN_ZERO_P(ptr,size) do {} while (0)
2007 #define ASSERT_MPN_NONZERO_P(ptr,size) do {} while (0)
2011 #if HAVE_NATIVE_mpn_com_n
2012 #define mpn_com_n __MPN(com_n)
2013 void mpn_com_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
2015 #define mpn_com_n(d,s,n) \
2018 mp_srcptr __s = (s); \
2019 mp_size_t __n = (n); \
2020 ASSERT (__n >= 1); \
2021 ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n)); \
2023 *__d++ = (~ *__s++) & GMP_NUMB_MASK; \
2028 #define MPN_LOGOPS_N_INLINE(d, s1, s2, n, operation) \
2031 mp_srcptr __s1 = (s1); \
2032 mp_srcptr __s2 = (s2); \
2033 mp_size_t __n = (n); \
2034 ASSERT (__n >= 1); \
2035 ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s1, __n)); \
2036 ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s2, __n)); \
2042 #if HAVE_NATIVE_mpn_and_n
2043 #define mpn_and_n __MPN(and_n)
2044 void mpn_and_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2046 #define mpn_and_n(d, s1, s2, n) \
2047 MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & *__s2++)
2050 #if HAVE_NATIVE_mpn_andn_n
2051 #define mpn_andn_n __MPN(andn_n)
2052 void mpn_andn_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2054 #define mpn_andn_n(d, s1, s2, n) \
2055 MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & ~*__s2++)
2058 #if HAVE_NATIVE_mpn_nand_n
2059 #define mpn_nand_n __MPN(nand_n)
2060 void mpn_nand_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2062 #define mpn_nand_n(d, s1, s2, n) \
2063 MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ & *__s2++) & GMP_NUMB_MASK)
2066 #if HAVE_NATIVE_mpn_ior_n
2067 #define mpn_ior_n __MPN(ior_n)
2068 void mpn_ior_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2070 #define mpn_ior_n(d, s1, s2, n) \
2071 MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ | *__s2++)
2074 #if HAVE_NATIVE_mpn_iorn_n
2075 #define mpn_iorn_n __MPN(iorn_n)
2076 void mpn_iorn_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2078 #define mpn_iorn_n(d, s1, s2, n) \
2079 MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = (*__s1++ | ~*__s2++) & GMP_NUMB_MASK)
2082 #if HAVE_NATIVE_mpn_nior_n
2083 #define mpn_nior_n __MPN(nior_n)
2084 void mpn_nior_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2086 #define mpn_nior_n(d, s1, s2, n) \
2087 MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ | *__s2++) & GMP_NUMB_MASK)
2090 #if HAVE_NATIVE_mpn_xor_n
2091 #define mpn_xor_n __MPN(xor_n)
2092 void mpn_xor_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2094 #define mpn_xor_n(d, s1, s2, n) \
2095 MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ ^ *__s2++)
2098 #if HAVE_NATIVE_mpn_xnor_n
2099 #define mpn_xnor_n __MPN(xnor_n)
2100 void mpn_xnor_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2102 #define mpn_xnor_n(d, s1, s2, n) \
2103 MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ ^ *__s2++) & GMP_NUMB_MASK)
2107 /* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
2108 #if GMP_NAIL_BITS == 0
2109 #define ADDC_LIMB(cout, w, x, y) \
2111 mp_limb_t __x = (x); \
2112 mp_limb_t __y = (y); \
2113 mp_limb_t __w = __x + __y; \
2115 (cout) = __w < __x; \
2118 #define ADDC_LIMB(cout, w, x, y) \
2124 (w) = __w & GMP_NUMB_MASK; \
2125 (cout) = __w >> GMP_NUMB_BITS; \
2129 /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
2131 #if GMP_NAIL_BITS == 0
2132 #define SUBC_LIMB(cout, w, x, y) \
2134 mp_limb_t __x = (x); \
2135 mp_limb_t __y = (y); \
2136 mp_limb_t __w = __x - __y; \
2138 (cout) = __w > __x; \
2141 #define SUBC_LIMB(cout, w, x, y) \
2143 mp_limb_t __w = (x) - (y); \
2144 (w) = __w & GMP_NUMB_MASK; \
2145 (cout) = __w >> (GMP_LIMB_BITS-1); \
2150 /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both
2151 expecting no carry (or borrow) from that.
2153 The size parameter is only for the benefit of assertion checking. In a
2154 normal build it's unused and the carry/borrow is just propagated as far
2157 On random data, usually only one or two limbs of {ptr,size} get updated,
2158 so there's no need for any sophisticated looping, just something compact
2161 FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,
2162 declaring their operand sizes, then remove the former. This is purely
2163 for the benefit of assertion checking. */
2165 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 && GMP_NAIL_BITS == 0 \
2166 && BITS_PER_MP_LIMB == 32 && ! defined (NO_ASM) && ! WANT_ASSERT
2167 /* Better flags handling than the generic C gives on i386, saving a few
2168 bytes of code and maybe a cycle or two. */
2170 #define MPN_IORD_U(ptr, incr, aors) \
2172 mp_ptr __ptr_dummy; \
2173 if (__builtin_constant_p (incr) && (incr) == 1) \
2175 __asm__ __volatile__ \
2176 ("\n" ASM_L(top) ":\n" \
2177 "\t" aors " $1, (%0)\n" \
2178 "\tleal 4(%0),%0\n" \
2179 "\tjc " ASM_L(top) \
2180 : "=r" (__ptr_dummy) \
2186 __asm__ __volatile__ \
2187 ( aors " %2,(%0)\n" \
2188 "\tjnc " ASM_L(done) "\n" \
2190 "\t" aors " $1,4(%0)\n" \
2191 "\tleal 4(%0),%0\n" \
2192 "\tjc " ASM_L(top) "\n" \
2194 : "=r" (__ptr_dummy) \
2201 #define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addl")
2202 #define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subl")
2203 #define mpn_incr_u(ptr, incr) MPN_INCR_U (ptr, 0, incr)
2204 #define mpn_decr_u(ptr, incr) MPN_DECR_U (ptr, 0, incr)
2207 #if GMP_NAIL_BITS == 0
2209 #define mpn_incr_u(p,incr) \
2213 if (__builtin_constant_p (incr) && (incr) == 1) \
2215 while (++(*(__p++)) == 0) \
2220 __x = *__p + (incr); \
2223 while (++(*(++__p)) == 0) \
2229 #define mpn_decr_u(p,incr) \
2233 if (__builtin_constant_p (incr) && (incr) == 1) \
2235 while ((*(__p++))-- == 0) \
2241 *__p = __x - (incr); \
2243 while ((*(++__p))-- == 0) \
2250 #if GMP_NAIL_BITS >= 1
2252 #define mpn_incr_u(p,incr) \
2256 if (__builtin_constant_p (incr) && (incr) == 1) \
2260 __x = (*__p + 1) & GMP_NUMB_MASK; \
2267 __x = (*__p + (incr)); \
2268 *__p++ = __x & GMP_NUMB_MASK; \
2269 if (__x >> GMP_NUMB_BITS != 0) \
2273 __x = (*__p + 1) & GMP_NUMB_MASK; \
2282 #define mpn_decr_u(p,incr) \
2286 if (__builtin_constant_p (incr) && (incr) == 1) \
2291 *__p++ = (__x - 1) & GMP_NUMB_MASK; \
2297 __x = *__p - (incr); \
2298 *__p++ = __x & GMP_NUMB_MASK; \
2299 if (__x >> GMP_NUMB_BITS != 0) \
2304 *__p++ = (__x - 1) & GMP_NUMB_MASK; \
2315 #define MPN_INCR_U(ptr, size, n) \
2317 ASSERT ((size) >= 1); \
2318 ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n)); \
2321 #define MPN_INCR_U(ptr, size, n) mpn_incr_u (ptr, n)
2327 #define MPN_DECR_U(ptr, size, n) \
2329 ASSERT ((size) >= 1); \
2330 ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n)); \
2333 #define MPN_DECR_U(ptr, size, n) mpn_decr_u (ptr, n)
2338 /* Structure for conversion between internal binary format and
2339 strings in base 2..36. */
2342 /* Number of digits in the conversion base that always fits in an mp_limb_t.
2343 For example, for base 10 on a machine where a mp_limb_t has 32 bits this
2344 is 9, since 10**9 is the largest number that fits into a mp_limb_t. */
2347 /* log(2)/log(conversion_base) */
2348 double chars_per_bit_exactly;
2350 /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
2351 factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
2352 i.e. the number of bits used to represent each digit in the base. */
2355 /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a
2356 fixed-point number. Instead of dividing by big_base an application can
2357 choose to multiply by big_base_inverted. */
2358 mp_limb_t big_base_inverted;
2361 #define mp_bases __MPN(bases)
2362 #define __mp_bases __MPN(bases)
2363 __GMP_DECLSPEC extern const struct bases mp_bases[257];
2366 /* For power of 2 bases this is exact. For other bases the result is either
2367 exact or one too big.
2369 To be exact always it'd be necessary to examine all the limbs of the
2370 operand, since numbers like 100..000 and 99...999 generally differ only
2371 in the lowest limb. It'd be possible to examine just a couple of high
2372 limbs to increase the probability of being exact, but that doesn't seem
2373 worth bothering with. */
2375 #define MPN_SIZEINBASE(result, ptr, size, base) \
2377 int __lb_base, __cnt; \
2380 ASSERT ((size) >= 0); \
2381 ASSERT ((base) >= 2); \
2382 ASSERT ((base) < numberof (mp_bases)); \
2384 /* Special case for X == 0. */ \
2389 /* Calculate the total number of significant bits of X. */ \
2390 count_leading_zeros (__cnt, (ptr)[(size)-1]); \
2391 __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2393 if (POW2_P (base)) \
2395 __lb_base = mp_bases[base].big_base; \
2396 (result) = (__totbits + __lb_base - 1) / __lb_base; \
2399 (result) = (size_t) \
2400 (__totbits * mp_bases[base].chars_per_bit_exactly) + 1; \
2404 /* eliminate mp_bases lookups for base==16 */
2405 #define MPN_SIZEINBASE_16(result, ptr, size) \
2408 mp_size_t __totbits; \
2410 ASSERT ((size) >= 0); \
2412 /* Special case for X == 0. */ \
2417 /* Calculate the total number of significant bits of X. */ \
2418 count_leading_zeros (__cnt, (ptr)[(size)-1]); \
2419 __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2420 (result) = (__totbits + 4 - 1) / 4; \
2424 /* bit count to limb count, rounding up */
2425 #define BITS_TO_LIMBS(n) (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
2427 /* MPN_SET_UI sets an mpn (ptr, cnt) to given ui. MPZ_FAKE_UI creates fake
2428 mpz_t from ui. The zp argument must have room for LIMBS_PER_ULONG limbs
2429 in both cases (LIMBS_PER_ULONG is also defined here.) */
2430 #if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */
2432 #define LIMBS_PER_ULONG 1
2433 #define MPN_SET_UI(zp, zn, u) \
2435 (zn) = ((zp)[0] != 0);
2436 #define MPZ_FAKE_UI(z, zp, u) \
2439 SIZ (z) = ((zp)[0] != 0); \
2440 ASSERT_CODE (ALLOC (z) = 1);
2442 #else /* need two limbs per ulong */
2444 #define LIMBS_PER_ULONG 2
2445 #define MPN_SET_UI(zp, zn, u) \
2446 (zp)[0] = (u) & GMP_NUMB_MASK; \
2447 (zp)[1] = (u) >> GMP_NUMB_BITS; \
2448 (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);
2449 #define MPZ_FAKE_UI(z, zp, u) \
2450 (zp)[0] = (u) & GMP_NUMB_MASK; \
2451 (zp)[1] = (u) >> GMP_NUMB_BITS; \
2452 SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \
2454 ASSERT_CODE (ALLOC (z) = 2);
2459 #if HAVE_HOST_CPU_FAMILY_x86
2460 #define TARGET_REGISTER_STARVED 1
2462 #define TARGET_REGISTER_STARVED 0
2466 /* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1
2467 or 0 there into a limb 0xFF..FF or 0 respectively.
2469 On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1,
2470 but C99 doesn't guarantee signed right shifts are arithmetic, so we have
2471 a little compile-time test and a fallback to a "? :" form. The latter is
2472 necessary for instance on Cray vector systems.
2474 Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this
2475 to an arithmetic right shift anyway, but it's good to get the desired
2476 shift on past versions too (in particular since an important use of
2477 LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv). */
2479 #define LIMB_HIGHBIT_TO_MASK(n) \
2480 (((mp_limb_signed_t) -1 >> 1) < 0 \
2481 ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1) \
2482 : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))
2485 /* Use a library function for invert_limb, if available. */
2486 #define mpn_invert_limb __MPN(invert_limb)
2487 mp_limb_t mpn_invert_limb __GMP_PROTO ((mp_limb_t)) ATTRIBUTE_CONST;
2488 #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
2489 #define invert_limb(invxl,xl) \
2491 (invxl) = mpn_invert_limb (xl); \
2496 #define invert_limb(invxl,xl) \
2499 ASSERT ((xl) != 0); \
2500 udiv_qrnnd (invxl, dummy, ~(xl), ~CNST_LIMB(0), xl); \
2504 #ifndef udiv_qrnnd_preinv
2505 #define udiv_qrnnd_preinv udiv_qrnnd_preinv3
2508 /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
2509 limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
2510 If this would yield overflow, DI should be the largest possible number
2511 (i.e., only ones). For correct operation, the most significant bit of D
2512 has to be set. Put the quotient in Q and the remainder in R. */
2513 #define udiv_qrnnd_preinv1(q, r, nh, nl, d, di) \
2515 mp_limb_t _q, _ql, _r; \
2516 mp_limb_t _xh, _xl; \
2517 ASSERT ((d) != 0); \
2518 umul_ppmm (_q, _ql, (nh), (di)); \
2519 _q += (nh); /* Compensate, di is 2**GMP_LIMB_BITS too small */ \
2520 umul_ppmm (_xh, _xl, _q, (d)); \
2521 sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \
2524 sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
2541 /* Like udiv_qrnnd_preinv, but branch-free. */
2542 #define udiv_qrnnd_preinv2(q, r, nh, nl, d, di) \
2544 mp_limb_t _n2, _n10, _nmask, _nadj, _q1; \
2545 mp_limb_t _xh, _xl; \
2548 _nmask = LIMB_HIGHBIT_TO_MASK (_n10); \
2549 _nadj = _n10 + (_nmask & (d)); \
2550 umul_ppmm (_xh, _xl, di, _n2 - _nmask); \
2551 add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj); \
2553 umul_ppmm (_xh, _xl, _q1, d); \
2554 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
2555 _xh -= (d); /* xh = 0 or -1 */ \
2556 (r) = _xl + ((d) & _xh); \
2560 /* Like udiv_qrnnd_preinv2, but for for any value D. DNORM is D shifted left
2561 so that its most significant bit is set. LGUP is ceil(log2(D)). */
2562 #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
2564 mp_limb_t _n2, _n10, _nmask, _nadj, _q1; \
2565 mp_limb_t _xh, _xl; \
2566 _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\
2567 _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup)); \
2568 _nmask = LIMB_HIGHBIT_TO_MASK (_n10); \
2569 _nadj = _n10 + (_nmask & (dnorm)); \
2570 umul_ppmm (_xh, _xl, di, _n2 - _nmask); \
2571 add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj); \
2573 umul_ppmm (_xh, _xl, _q1, d); \
2574 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
2576 (r) = _xl + ((d) & _xh); \
2580 /* udiv_qrnnd_preinv3 -- Based on work by Niels Möller and Torbjörn Granlund.
2582 We write things strangely below, to help gcc. A more straightforward
2585 _r = (nl) - _qh * (d);
2593 For one operation shorter critical path, one may want to use this form:
2605 #define udiv_qrnnd_preinv3(q, r, nh, nl, d, di) \
2607 mp_limb_t _qh, _ql, _r; \
2608 umul_ppmm (_qh, _ql, (nh), (di)); \
2609 if (__builtin_constant_p (nl) && (nl) == 0) \
2612 add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
2613 _r = (nl) - _qh * (d); \
2614 if (_r > _ql) /* both > and >= should be OK */ \
2619 if (UNLIKELY (_r >= (d))) \
2628 /* Compute r = nh*B mod d, where di is the inverse of d. */
2629 #define udiv_rnd_preinv(r, nh, d, di) \
2631 mp_limb_t _qh, _ql, _r; \
2632 umul_ppmm (_qh, _ql, (nh), (di)); \
2640 #ifndef mpn_preinv_divrem_1 /* if not done with cpuvec in a fat binary */
2641 #define mpn_preinv_divrem_1 __MPN(preinv_divrem_1)
2642 mp_limb_t mpn_preinv_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
2646 /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to
2647 the plain mpn_divrem_1. Likewise USE_PREINV_MOD_1 chooses between
2648 mpn_preinv_mod_1 and plain mpn_mod_1. The default for both is yes, since
2649 the few CISC chips where preinv is not good have defines saying so. */
2650 #ifndef USE_PREINV_DIVREM_1
2651 #define USE_PREINV_DIVREM_1 1
2653 #ifndef USE_PREINV_MOD_1
2654 #define USE_PREINV_MOD_1 1
2657 #if USE_PREINV_DIVREM_1
2658 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
2659 mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)
2661 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
2662 mpn_divrem_1 (qp, xsize, ap, size, d)
2665 #if USE_PREINV_MOD_1
2666 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \
2667 mpn_preinv_mod_1 (src, size, divisor, inverse)
2669 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \
2670 mpn_mod_1 (src, size, divisor)
2674 #ifndef mpn_mod_34lsub1 /* if not done with cpuvec in a fat binary */
2675 #define mpn_mod_34lsub1 __MPN(mod_34lsub1)
2676 mp_limb_t mpn_mod_34lsub1 __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
2680 /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
2681 plain mpn_divrem_1. Likewise MODEXACT_1_ODD_THRESHOLD for
2682 mpn_modexact_1_odd against plain mpn_mod_1. On most CPUs divexact and
2683 modexact are faster at all sizes, so the defaults are 0. Those CPUs
2684 where this is not right have a tuned threshold. */
2685 #ifndef DIVEXACT_1_THRESHOLD
2686 #define DIVEXACT_1_THRESHOLD 0
2688 #ifndef MODEXACT_1_ODD_THRESHOLD
2689 #define MODEXACT_1_ODD_THRESHOLD 0
2692 #ifndef mpn_divexact_1 /* if not done with cpuvec in a fat binary */
2693 #define mpn_divexact_1 __MPN(divexact_1)
2694 void mpn_divexact_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
2697 #define MPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor) \
2699 if (BELOW_THRESHOLD (size, DIVEXACT_1_THRESHOLD)) \
2700 ASSERT_NOCARRY (mpn_divrem_1 (dst, (mp_size_t) 0, src, size, divisor)); \
2703 ASSERT (mpn_mod_1 (src, size, divisor) == 0); \
2704 mpn_divexact_1 (dst, src, size, divisor); \
2708 #ifndef mpn_modexact_1c_odd /* if not done with cpuvec in a fat binary */
2709 #define mpn_modexact_1c_odd __MPN(modexact_1c_odd)
2710 mp_limb_t mpn_modexact_1c_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
2713 #if HAVE_NATIVE_mpn_modexact_1_odd
2714 #define mpn_modexact_1_odd __MPN(modexact_1_odd)
2715 mp_limb_t mpn_modexact_1_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
2717 #define mpn_modexact_1_odd(src,size,divisor) \
2718 mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
2721 #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor) \
2722 (ABOVE_THRESHOLD (size, MODEXACT_1_ODD_THRESHOLD) \
2723 ? mpn_modexact_1_odd (src, size, divisor) \
2724 : mpn_mod_1 (src, size, divisor))
2727 /* binvert_limb() sets inv to the multiplicative inverse of n modulo
2728 2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS.
2729 n must be odd (otherwise such an inverse doesn't exist).
2731 This is not to be confused with invert_limb(), which is completely
2734 The table lookup gives an inverse with the low 8 bits valid, and each
2735 multiply step doubles the number of bits. See Jebelean "An algorithm for
2736 exact division" end of section 4 (reference in gmp.texi).
2738 Possible enhancement: Could use UHWtype until the last step, if half-size
2739 multiplies are faster (might help under _LONG_LONG_LIMB).
2741 Alternative: As noted in Granlund and Montgomery "Division by Invariant
2742 Integers using Multiplication" (reference in gmp.texi), n itself gives a
2743 3-bit inverse immediately, and could be used instead of a table lookup.
2744 A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into
2745 bit 3, for instance with (((n + 2) & 4) << 1) ^ n. */
2747 #define binvert_limb_table __gmp_binvert_limb_table
2748 __GMP_DECLSPEC extern const unsigned char binvert_limb_table[128];
2750 #define binvert_limb(inv,n) \
2752 mp_limb_t __n = (n); \
2754 ASSERT ((__n & 1) == 1); \
2756 __inv = binvert_limb_table[(__n/2) & 0x7F]; /* 8 */ \
2757 if (GMP_NUMB_BITS > 8) __inv = 2 * __inv - __inv * __inv * __n; \
2758 if (GMP_NUMB_BITS > 16) __inv = 2 * __inv - __inv * __inv * __n; \
2759 if (GMP_NUMB_BITS > 32) __inv = 2 * __inv - __inv * __inv * __n; \
2761 if (GMP_NUMB_BITS > 64) \
2763 int __invbits = 64; \
2765 __inv = 2 * __inv - __inv * __inv * __n; \
2767 } while (__invbits < GMP_NUMB_BITS); \
2770 ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \
2771 (inv) = __inv & GMP_NUMB_MASK; \
2773 #define modlimb_invert binvert_limb /* backward compatibility */
2775 /* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS.
2776 Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits.
2777 GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd
2778 we need to start from GMP_NUMB_MAX>>1. */
2779 #define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1)
2781 /* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3).
2782 These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */
2783 #define GMP_NUMB_CEIL_MAX_DIV3 (GMP_NUMB_MAX / 3 + 1)
2784 #define GMP_NUMB_CEIL_2MAX_DIV3 ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT)
2787 /* Set r to -a mod d. a>=d is allowed. Can give r>d. All should be limbs.
2789 It's not clear whether this is the best way to do this calculation.
2790 Anything congruent to -a would be fine for the one limb congruence
2793 #define NEG_MOD(r, a, d) \
2795 ASSERT ((d) != 0); \
2801 /* small a is reasonably likely */ \
2807 mp_limb_t __dnorm; \
2808 count_leading_zeros (__twos, d); \
2809 __twos -= GMP_NAIL_BITS; \
2810 __dnorm = (d) << __twos; \
2811 (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a); \
2817 /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */
2818 #define LOW_ZEROS_MASK(n) (((n) & -(n)) - 1)
2821 /* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or
2822 to 0 if there's an even number. "n" should be an unsigned long and "p"
2825 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
2826 #define ULONG_PARITY(p, n) \
2829 __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n)); \
2834 /* Cray intrinsic _popcnt. */
2836 #define ULONG_PARITY(p, n) \
2838 (p) = _popcnt (n) & 1; \
2842 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
2843 && ! defined (NO_ASM) && defined (__ia64)
2844 /* unsigned long is either 32 or 64 bits depending on the ABI, zero extend
2845 to a 64 bit unsigned long long for popcnt */
2846 #define ULONG_PARITY(p, n) \
2848 unsigned long long __n = (unsigned long) (n); \
2850 __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n)); \
2855 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
2856 && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86
2857 #if __GMP_GNUC_PREREQ (3,1)
2858 #define __GMP_qm "=Qm"
2859 #define __GMP_q "=Q"
2861 #define __GMP_qm "=qm"
2862 #define __GMP_q "=q"
2864 #define ULONG_PARITY(p, n) \
2867 unsigned long __n = (n); \
2868 __n ^= (__n >> 16); \
2869 __asm__ ("xorb %h1, %b1\n\t" \
2871 : __GMP_qm (__p), __GMP_q (__n) \
2877 #if ! defined (ULONG_PARITY)
2878 #define ULONG_PARITY(p, n) \
2880 unsigned long __n = (n); \
2884 __p ^= 0x96696996L >> (__n & 0x1F); \
2894 /* 3 cycles on 604 or 750 since shifts and rlwimi's can pair. gcc (as of
2895 version 3.1 at least) doesn't seem to know how to generate rlwimi for
2896 anything other than bit-fields, so use "asm". */
2897 #if defined (__GNUC__) && ! defined (NO_ASM) \
2898 && HAVE_HOST_CPU_FAMILY_powerpc && BITS_PER_MP_LIMB == 32
2899 #define BSWAP_LIMB(dst, src) \
2901 mp_limb_t __bswapl_src = (src); \
2902 mp_limb_t __tmp1 = __bswapl_src >> 24; /* low byte */ \
2903 mp_limb_t __tmp2 = __bswapl_src << 24; /* high byte */ \
2904 __asm__ ("rlwimi %0, %2, 24, 16, 23" /* 2nd low */ \
2905 : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src)); \
2906 __asm__ ("rlwimi %0, %2, 8, 8, 15" /* 3nd high */ \
2907 : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src)); \
2908 (dst) = __tmp1 | __tmp2; /* whole */ \
2912 /* bswap is available on i486 and up and is fast. A combination rorw $8 /
2913 roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux
2914 kernel with xchgb instead of rorw), but this is not done here, because
2915 i386 means generic x86 and mixing word and dword operations will cause
2916 partial register stalls on P6 chips. */
2917 #if defined (__GNUC__) && ! defined (NO_ASM) \
2918 && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386 \
2919 && BITS_PER_MP_LIMB == 32
2920 #define BSWAP_LIMB(dst, src) \
2922 __asm__ ("bswap %0" : "=r" (dst) : "0" (src)); \
2926 #if defined (__GNUC__) && ! defined (NO_ASM) \
2927 && defined (__amd64__) && BITS_PER_MP_LIMB == 64
2928 #define BSWAP_LIMB(dst, src) \
2930 __asm__ ("bswap %q0" : "=r" (dst) : "0" (src)); \
2934 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
2935 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
2936 #define BSWAP_LIMB(dst, src) \
2938 __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) : "r" (src)); \
2943 #if defined (__GNUC__) && ! defined (NO_ASM) \
2944 && HAVE_HOST_CPU_FAMILY_m68k && BITS_PER_MP_LIMB == 32
2945 #define BSWAP_LIMB(dst, src) \
2947 mp_limb_t __bswapl_src = (src); \
2948 __asm__ ("ror%.w %#8, %0\n\t" \
2952 : "0" (__bswapl_src)); \
2956 #if ! defined (BSWAP_LIMB)
2957 #if BITS_PER_MP_LIMB == 8
2958 #define BSWAP_LIMB(dst, src) \
2959 do { (dst) = (src); } while (0)
2961 #if BITS_PER_MP_LIMB == 16
2962 #define BSWAP_LIMB(dst, src) \
2964 (dst) = ((src) << 8) + ((src) >> 8); \
2967 #if BITS_PER_MP_LIMB == 32
2968 #define BSWAP_LIMB(dst, src) \
2972 + (((src) & 0xFF00) << 8) \
2973 + (((src) >> 8) & 0xFF00) \
2977 #if BITS_PER_MP_LIMB == 64
2978 #define BSWAP_LIMB(dst, src) \
2982 + (((src) & 0xFF00) << 40) \
2983 + (((src) & 0xFF0000) << 24) \
2984 + (((src) & 0xFF000000) << 8) \
2985 + (((src) >> 8) & 0xFF000000) \
2986 + (((src) >> 24) & 0xFF0000) \
2987 + (((src) >> 40) & 0xFF00) \
2993 #if ! defined (BSWAP_LIMB)
2994 #define BSWAP_LIMB(dst, src) \
2996 mp_limb_t __bswapl_src = (src); \
2997 mp_limb_t __dst = 0; \
2999 for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++) \
3001 __dst = (__dst << 8) | (__bswapl_src & 0xFF); \
3002 __bswapl_src >>= 8; \
3009 /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
3010 those we know are fast. */
3011 #if defined (__GNUC__) && ! defined (NO_ASM) \
3012 && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN \
3013 && (HAVE_HOST_CPU_powerpc604 \
3014 || HAVE_HOST_CPU_powerpc604e \
3015 || HAVE_HOST_CPU_powerpc750 \
3016 || HAVE_HOST_CPU_powerpc7400)
3017 #define BSWAP_LIMB_FETCH(limb, src) \
3019 mp_srcptr __blf_src = (src); \
3021 __asm__ ("lwbrx %0, 0, %1" \
3023 : "r" (__blf_src), \
3024 "m" (*__blf_src)); \
3029 #if ! defined (BSWAP_LIMB_FETCH)
3030 #define BSWAP_LIMB_FETCH(limb, src) BSWAP_LIMB (limb, *(src))
3034 /* On the same basis that lwbrx might be slow, restrict stwbrx to those we
3035 know are fast. FIXME: Is this necessary? */
3036 #if defined (__GNUC__) && ! defined (NO_ASM) \
3037 && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN \
3038 && (HAVE_HOST_CPU_powerpc604 \
3039 || HAVE_HOST_CPU_powerpc604e \
3040 || HAVE_HOST_CPU_powerpc750 \
3041 || HAVE_HOST_CPU_powerpc7400)
3042 #define BSWAP_LIMB_STORE(dst, limb) \
3044 mp_ptr __dst = (dst); \
3045 mp_limb_t __limb = (limb); \
3046 __asm__ ("stwbrx %1, 0, %2" \
3053 #if ! defined (BSWAP_LIMB_STORE)
3054 #define BSWAP_LIMB_STORE(dst, limb) BSWAP_LIMB (*(dst), limb)
3058 /* Byte swap limbs from {src,size} and store at {dst,size}. */
3059 #define MPN_BSWAP(dst, src, size) \
3061 mp_ptr __dst = (dst); \
3062 mp_srcptr __src = (src); \
3063 mp_size_t __size = (size); \
3065 ASSERT ((size) >= 0); \
3066 ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); \
3067 CRAY_Pragma ("_CRI ivdep"); \
3068 for (__i = 0; __i < __size; __i++) \
3070 BSWAP_LIMB_FETCH (*__dst, __src); \
3076 /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
3077 #define MPN_BSWAP_REVERSE(dst, src, size) \
3079 mp_ptr __dst = (dst); \
3080 mp_size_t __size = (size); \
3081 mp_srcptr __src = (src) + __size - 1; \
3083 ASSERT ((size) >= 0); \
3084 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
3085 CRAY_Pragma ("_CRI ivdep"); \
3086 for (__i = 0; __i < __size; __i++) \
3088 BSWAP_LIMB_FETCH (*__dst, __src); \
3095 /* No processor claiming to be SPARC v9 compliant seems to
3096 implement the POPC instruction. Disable pattern for now. */
3098 #if defined __GNUC__ && defined __sparc_v9__ && BITS_PER_MP_LIMB == 64
3099 #define popc_limb(result, input) \
3102 __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input)); \
3107 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
3108 #define popc_limb(result, input) \
3110 __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input)); \
3114 /* Cray intrinsic. */
3116 #define popc_limb(result, input) \
3118 (result) = _popcnt (input); \
3122 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3123 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
3124 #define popc_limb(result, input) \
3126 __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input)); \
3130 /* Cool population count of an mp_limb_t.
3131 You have to figure out how this works, We won't tell you!
3133 The constants could also be expressed as:
3134 0x55... = [2^N / 3] = [(2^N-1)/3]
3135 0x33... = [2^N / 5] = [(2^N-1)/5]
3136 0x0f... = [2^N / 17] = [(2^N-1)/17]
3137 (N is GMP_LIMB_BITS, [] denotes truncation.) */
3139 #if ! defined (popc_limb) && GMP_LIMB_BITS == 8
3140 #define popc_limb(result, input) \
3142 mp_limb_t __x = (input); \
3143 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3144 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3145 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3146 (result) = __x & 0xff; \
3150 #if ! defined (popc_limb) && GMP_LIMB_BITS == 16
3151 #define popc_limb(result, input) \
3153 mp_limb_t __x = (input); \
3154 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3155 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3156 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3157 __x = ((__x >> 8) + __x); \
3158 (result) = __x & 0xff; \
3162 #if ! defined (popc_limb) && GMP_LIMB_BITS == 32
3163 #define popc_limb(result, input) \
3165 mp_limb_t __x = (input); \
3166 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3167 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3168 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3169 __x = ((__x >> 8) + __x); \
3170 __x = ((__x >> 16) + __x); \
3171 (result) = __x & 0xff; \
3175 #if ! defined (popc_limb) && GMP_LIMB_BITS == 64
3176 #define popc_limb(result, input) \
3178 mp_limb_t __x = (input); \
3179 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3180 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3181 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3182 __x = ((__x >> 8) + __x); \
3183 __x = ((__x >> 16) + __x); \
3184 __x = ((__x >> 32) + __x); \
3185 (result) = __x & 0xff; \
3190 /* Define stuff for longlong.h. */
3191 #if HAVE_ATTRIBUTE_MODE
3192 typedef unsigned int UQItype __attribute__ ((mode (QI)));
3193 typedef int SItype __attribute__ ((mode (SI)));
3194 typedef unsigned int USItype __attribute__ ((mode (SI)));
3195 typedef int DItype __attribute__ ((mode (DI)));
3196 typedef unsigned int UDItype __attribute__ ((mode (DI)));
3198 typedef unsigned char UQItype;
3199 typedef long SItype;
3200 typedef unsigned long USItype;
3202 typedef long long int DItype;
3203 typedef unsigned long long int UDItype;
3204 #else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
3205 typedef long int DItype;
3206 typedef unsigned long int UDItype;
3210 typedef mp_limb_t UWtype;
3211 typedef unsigned int UHWtype;
3212 #define W_TYPE_SIZE BITS_PER_MP_LIMB
3214 /* Define ieee_double_extract and _GMP_IEEE_FLOATS.
3216 Bit field packing is "implementation defined" according to C99, which
3217 leaves us at the compiler's mercy here. For some systems packing is
3218 defined in the ABI (eg. x86). In any case so far it seems universal that
3219 little endian systems pack from low to high, and big endian from high to
3220 low within the given type.
3222 Within the fields we rely on the integer endianness being the same as the
3223 float endianness, this is true everywhere we know of and it'd be a fairly
3224 strange system that did anything else. */
3226 #if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
3227 #define _GMP_IEEE_FLOATS 1
3228 union ieee_double_extract
3232 gmp_uint_least32_t manh:20;
3233 gmp_uint_least32_t exp:11;
3234 gmp_uint_least32_t sig:1;
3235 gmp_uint_least32_t manl:32;
3241 #if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
3242 #define _GMP_IEEE_FLOATS 1
3243 union ieee_double_extract
3247 gmp_uint_least32_t manl:32;
3248 gmp_uint_least32_t manh:20;
3249 gmp_uint_least32_t exp:11;
3250 gmp_uint_least32_t sig:1;
3256 #if HAVE_DOUBLE_IEEE_BIG_ENDIAN
3257 #define _GMP_IEEE_FLOATS 1
3258 union ieee_double_extract
3262 gmp_uint_least32_t sig:1;
3263 gmp_uint_least32_t exp:11;
3264 gmp_uint_least32_t manh:20;
3265 gmp_uint_least32_t manl:32;
3272 /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
3273 that don't convert ulong->double correctly (eg. SunOS 4 native cc). */
3274 #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))
3275 /* Maximum number of limbs it will take to store any `double'.
3276 We assume doubles have 53 mantissa bits. */
3277 #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1)
3279 int __gmp_extract_double __GMP_PROTO ((mp_ptr, double));
3281 #define mpn_get_d __gmpn_get_d
3282 double mpn_get_d __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, long)) __GMP_ATTRIBUTE_PURE;
3285 /* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes
3286 a_inf if x is an infinity. Both are considered unlikely values, for
3287 branch prediction. */
3289 #if _GMP_IEEE_FLOATS
3290 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3292 union ieee_double_extract u; \
3294 if (UNLIKELY (u.s.exp == 0x7FF)) \
3296 if (u.s.manl == 0 && u.s.manh == 0) \
3304 #if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP
3305 /* no nans or infs in these formats */
3306 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3310 #ifndef DOUBLE_NAN_INF_ACTION
3311 /* Unknown format, try something generic.
3312 NaN should be "unordered", so x!=x.
3313 Inf should be bigger than DBL_MAX. */
3314 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3317 if (UNLIKELY ((x) != (x))) \
3319 else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX)) \
3325 /* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles
3326 in the coprocessor, which means a bigger exponent range than normal, and
3327 depending on the rounding mode, a bigger mantissa than normal. (See
3328 "Disappointments" in the gcc manual.) FORCE_DOUBLE stores and fetches
3329 "d" through memory to force any rounding and overflows to occur.
3331 On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm
3332 registers, where there's no such extra precision and no need for the
3333 FORCE_DOUBLE. We don't bother to detect this since the present uses for
3334 FORCE_DOUBLE are only in test programs and default generic C code.
3336 Not quite sure that an "automatic volatile" will use memory, but it does
3337 in gcc. An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since
3338 apparently matching operands like "0" are only allowed on a register
3339 output. gcc 3.4 warns about this, though in fact it and past versions
3340 seem to put the operand through memory as hoped. */
3342 #if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86 \
3343 || defined (__amd64__))
3344 #define FORCE_DOUBLE(d) \
3345 do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0)
3347 #define FORCE_DOUBLE(d) do { } while (0)
3351 extern int __gmp_junk;
3352 extern const int __gmp_0;
3353 void __gmp_exception __GMP_PROTO ((int)) ATTRIBUTE_NORETURN;
3354 void __gmp_divide_by_zero __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3355 void __gmp_sqrt_of_negative __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3356 void __gmp_invalid_operation __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3357 #define GMP_ERROR(code) __gmp_exception (code)
3358 #define DIVIDE_BY_ZERO __gmp_divide_by_zero ()
3359 #define SQRT_OF_NEGATIVE __gmp_sqrt_of_negative ()
3361 #if defined _LONG_LONG_LIMB
3362 #if __GMP_HAVE_TOKEN_PASTE
3363 #define CNST_LIMB(C) ((mp_limb_t) C##LL)
3365 #define CNST_LIMB(C) ((mp_limb_t) C/**/LL)
3367 #else /* not _LONG_LONG_LIMB */
3368 #if __GMP_HAVE_TOKEN_PASTE
3369 #define CNST_LIMB(C) ((mp_limb_t) C##L)
3371 #define CNST_LIMB(C) ((mp_limb_t) C/**/L)
3373 #endif /* _LONG_LONG_LIMB */
3375 /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
3376 #if GMP_NUMB_BITS == 2
3377 #define PP 0x3 /* 3 */
3378 #define PP_FIRST_OMITTED 5
3380 #if GMP_NUMB_BITS == 4
3381 #define PP 0xF /* 3 x 5 */
3382 #define PP_FIRST_OMITTED 7
3384 #if GMP_NUMB_BITS == 8
3385 #define PP 0x69 /* 3 x 5 x 7 */
3386 #define PP_FIRST_OMITTED 11
3388 #if GMP_NUMB_BITS == 16
3389 #define PP 0x3AA7 /* 3 x 5 x 7 x 11 x 13 */
3390 #define PP_FIRST_OMITTED 17
3392 #if GMP_NUMB_BITS == 32
3393 #define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x ... x 29 */
3394 #define PP_INVERTED 0x53E5645CL
3395 #define PP_FIRST_OMITTED 31
3397 #if GMP_NUMB_BITS == 64
3398 #define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x ... x 53 */
3399 #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
3400 #define PP_FIRST_OMITTED 59
3402 #ifndef PP_FIRST_OMITTED
3403 #define PP_FIRST_OMITTED 3
3408 /* BIT1 means a result value in bit 1 (second least significant bit), with a
3409 zero bit representing +1 and a one bit representing -1. Bits other than
3410 bit 1 are garbage. These are meant to be kept in "int"s, and casts are
3411 used to ensure the expressions are "int"s even if a and/or b might be
3414 JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
3415 and their speed is important. Expressions are used rather than
3416 conditionals to accumulate sign changes, which effectively means XORs
3417 instead of conditional JUMPs. */
3419 /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
3420 #define JACOBI_S0(a) (((a) == 1) | ((a) == -1))
3422 /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
3423 #define JACOBI_U0(a) ((a) == 1)
3425 /* (a/0), with a given by low and size;
3426 is 1 if a=+/-1, 0 otherwise */
3427 #define JACOBI_LS0(alow,asize) \
3428 (((asize) == 1 || (asize) == -1) && (alow) == 1)
3430 /* (a/0), with a an mpz_t;
3431 fetch of low limb always valid, even if size is zero */
3432 #define JACOBI_Z0(a) JACOBI_LS0 (PTR(a)[0], SIZ(a))
3434 /* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */
3435 #define JACOBI_0U(b) ((b) == 1)
3437 /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
3438 #define JACOBI_0S(b) ((b) == 1 || (b) == -1)
3440 /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */
3441 #define JACOBI_0LS(blow,bsize) \
3442 (((bsize) == 1 || (bsize) == -1) && (blow) == 1)
3444 /* Convert a bit1 to +1 or -1. */
3445 #define JACOBI_BIT1_TO_PN(result_bit1) \
3446 (1 - ((int) (result_bit1) & 2))
3448 /* (2/b), with b unsigned and odd;
3449 is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
3450 hence obtained from (b>>1)^b */
3451 #define JACOBI_TWO_U_BIT1(b) \
3452 ((int) (((b) >> 1) ^ (b)))
3454 /* (2/b)^twos, with b unsigned and odd */
3455 #define JACOBI_TWOS_U_BIT1(twos, b) \
3456 ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
3458 /* (2/b)^twos, with b unsigned and odd */
3459 #define JACOBI_TWOS_U(twos, b) \
3460 (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
3462 /* (-1/b), with b odd (signed or unsigned);
3463 is (-1)^((b-1)/2) */
3464 #define JACOBI_N1B_BIT1(b) \
3467 /* (a/b) effect due to sign of a: signed/unsigned, b odd;
3468 is (-1/b) if a<0, or +1 if a>=0 */
3469 #define JACOBI_ASGN_SU_BIT1(a, b) \
3470 ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))
3472 /* (a/b) effect due to sign of b: signed/signed;
3473 is -1 if a and b both negative, +1 otherwise */
3474 #define JACOBI_BSGN_SS_BIT1(a, b) \
3475 ((((a)<0) & ((b)<0)) << 1)
3477 /* (a/b) effect due to sign of b: signed/mpz;
3478 is -1 if a and b both negative, +1 otherwise */
3479 #define JACOBI_BSGN_SZ_BIT1(a, b) \
3480 JACOBI_BSGN_SS_BIT1 (a, SIZ(b))
3482 /* (a/b) effect due to sign of b: mpz/signed;
3483 is -1 if a and b both negative, +1 otherwise */
3484 #define JACOBI_BSGN_ZS_BIT1(a, b) \
3485 JACOBI_BSGN_SZ_BIT1 (b, a)
3487 /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;
3488 is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if
3489 both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd
3490 because this is used in a couple of places with only bit 1 of a or b
3492 #define JACOBI_RECIP_UU_BIT1(a, b) \
3495 /* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and
3496 decrementing b_size. b_low should be b_ptr[0] on entry, and will be
3497 updated for the new b_ptr. result_bit1 is updated according to the
3498 factors of 2 stripped, as per (a/2). */
3499 #define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low) \
3501 ASSERT ((b_size) >= 1); \
3502 ASSERT ((b_low) == (b_ptr)[0]); \
3504 while (UNLIKELY ((b_low) == 0)) \
3507 ASSERT ((b_size) >= 1); \
3509 (b_low) = *(b_ptr); \
3511 ASSERT (((a) & 1) != 0); \
3512 if ((GMP_NUMB_BITS % 2) == 1) \
3513 (result_bit1) ^= JACOBI_TWO_U_BIT1(a); \
3517 /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
3518 modexact_1_odd, but in either case leaving a_rem<b. b must be odd and
3519 unsigned. modexact_1_odd effectively calculates -a mod b, and
3520 result_bit1 is adjusted for the factor of -1.
3522 The way mpn_modexact_1_odd sometimes bases its remainder on a_size and
3523 sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what
3524 factor to introduce into result_bit1, so for that case use mpn_mod_1
3527 FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used
3528 for odd GMP_NUMB_BITS would be good. Perhaps it could mung its result,
3529 or not skip a divide step, or something. */
3531 #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
3533 mp_srcptr __a_ptr = (a_ptr); \
3534 mp_size_t __a_size = (a_size); \
3535 mp_limb_t __b = (b); \
3537 ASSERT (__a_size >= 1); \
3540 if ((GMP_NUMB_BITS % 2) != 0 \
3541 || BELOW_THRESHOLD (__a_size, MODEXACT_1_ODD_THRESHOLD)) \
3543 (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b); \
3547 (result_bit1) ^= JACOBI_N1B_BIT1 (__b); \
3548 (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b); \
3552 /* Matrix multiplication */
3553 #define mpn_matrix22_mul __MPN(matrix22_mul)
3554 void mpn_matrix22_mul __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
3555 #define mpn_matrix22_mul_strassen __MPN(matrix22_mul_strassen)
3556 void mpn_matrix22_mul_strassen __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
3557 #define mpn_matrix22_mul_itch __MPN(matrix22_mul_itch)
3558 mp_size_t mpn_matrix22_mul_itch __GMP_PROTO ((mp_size_t, mp_size_t));
3560 #ifndef MATRIX22_STRASSEN_THRESHOLD
3561 #define MATRIX22_STRASSEN_THRESHOLD 30
3564 /* HGCD definitions */
3566 /* Extract one numb, shifting count bits left
3568 |___xh___||___xl___|
3572 The count includes any nail bits, so it should work fine if count
3573 is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of
3574 xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS.
3576 FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for
3577 those calls where the count high bits of xh may be non-zero.
3580 #define MPN_EXTRACT_NUMB(count, xh, xl) \
3581 ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) | \
3582 ((xl) >> (GMP_LIMB_BITS - (count))))
3585 /* The matrix non-negative M = (u, u'; v,v') keeps track of the
3586 reduction (a;b) = M (alpha; beta) where alpha, beta are smaller
3587 than a, b. The determinant must always be one, so that M has an
3588 inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1
3595 #define mpn_hgcd2 __MPN (hgcd2)
3596 int mpn_hgcd2 __GMP_PROTO ((mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *));
3598 #define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector)
3599 mp_size_t mpn_hgcd_mul_matrix1_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
3601 #define mpn_hgcd_mul_matrix1_inverse_vector __MPN (hgcd_mul_matrix1_inverse_vector)
3602 mp_size_t mpn_hgcd_mul_matrix1_inverse_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
3606 mp_size_t alloc; /* for sanity checking only */
3611 #define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1))
3613 #define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init)
3614 void mpn_hgcd_matrix_init __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr));
3616 #define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul)
3617 void mpn_hgcd_matrix_mul __GMP_PROTO ((struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr));
3619 #define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)
3620 mp_size_t mpn_hgcd_matrix_adjust __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3622 #define mpn_hgcd_itch __MPN (hgcd_itch)
3623 mp_size_t mpn_hgcd_itch __GMP_PROTO ((mp_size_t));
3625 #define mpn_hgcd __MPN (hgcd)
3626 mp_size_t mpn_hgcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
3628 #define MPN_HGCD_LEHMER_ITCH(n) (n)
3630 #define mpn_hgcd_lehmer __MPN (hgcd_lehmer)
3631 mp_size_t mpn_hgcd_lehmer __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
3633 /* Needs storage for the quotient */
3634 #define MPN_GCD_SUBDIV_STEP_ITCH(n) (n)
3636 #define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step)
3637 mp_size_t mpn_gcd_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3639 #define MPN_GCD_LEHMER_N_ITCH(n) (n)
3641 #define mpn_gcd_lehmer_n __MPN(gcd_lehmer_n)
3642 mp_size_t mpn_gcd_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3644 #define mpn_gcdext_subdiv_step __MPN(gcdext_subdiv_step)
3645 mp_size_t mpn_gcdext_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr));
3647 #define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3)
3649 #define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n)
3650 mp_size_t mpn_gcdext_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3652 /* 4*(an + 1) + 4*(bn + 1) + an */
3653 #define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8)
3655 #ifndef HGCD_THRESHOLD
3656 #define HGCD_THRESHOLD 400
3659 #ifndef GCD_DC_THRESHOLD
3660 #define GCD_DC_THRESHOLD 1000
3663 #ifndef GCDEXT_DC_THRESHOLD
3664 #define GCDEXT_DC_THRESHOLD 600
3667 /* Definitions for mpn_set_str and mpn_get_str */
3670 mp_ptr p; /* actual power value */
3671 mp_size_t n; /* # of limbs at p */
3672 mp_size_t shift; /* weight of lowest limb, in limb base B */
3673 size_t digits_in_base; /* number of corresponding digits */
3676 typedef struct powers powers_t;
3677 #define mpn_dc_set_str_powtab_alloc(n) ((n) + GMP_LIMB_BITS)
3678 #define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS)
3679 #define mpn_dc_get_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS)
3680 #define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS)
3682 #define mpn_dc_set_str __MPN(dc_set_str)
3683 mp_size_t mpn_dc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr));
3684 #define mpn_bc_set_str __MPN(bc_set_str)
3685 mp_size_t mpn_bc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int));
3686 #define mpn_set_str_compute_powtab __MPN(set_str_compute_powtab)
3687 void mpn_set_str_compute_powtab __GMP_PROTO ((powers_t *, mp_ptr, mp_size_t, int));
3690 /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
3691 limb and adds an extra limb. __GMPF_PREC_TO_BITS drops that extra limb,
3692 hence giving back the user's size in bits rounded up. Notice that
3693 converting prec->bits->prec gives an unchanged value. */
3694 #define __GMPF_BITS_TO_PREC(n) \
3695 ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))
3696 #define __GMPF_PREC_TO_BITS(n) \
3697 ((unsigned long) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)
3699 extern mp_size_t __gmp_default_fp_limb_precision;
3702 /* Set n to the number of significant digits an mpf of the given _mp_prec
3703 field, in the given base. This is a rounded up value, designed to ensure
3704 there's enough digits to reproduce all the guaranteed part of the value.
3706 There are prec many limbs, but the high might be only "1" so forget it
3707 and just count prec-1 limbs into chars. +1 rounds that upwards, and a
3708 further +1 is because the limbs usually won't fall on digit boundaries.
3710 FIXME: If base is a power of 2 and the bits per digit divides
3711 BITS_PER_MP_LIMB then the +2 is unnecessary. This happens always for
3712 base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
3714 #define MPF_SIGNIFICANT_DIGITS(n, base, prec) \
3716 ASSERT (base >= 2 && base < numberof (mp_bases)); \
3717 (n) = 2 + (size_t) ((((size_t) (prec) - 1) * GMP_NUMB_BITS) \
3718 * mp_bases[(base)].chars_per_bit_exactly); \
3722 /* Decimal point string, from the current C locale. Needs <langinfo.h> for
3723 nl_langinfo and constants, preferably with _GNU_SOURCE defined to get
3724 DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under
3725 their respective #if HAVE_FOO_H.
3727 GLIBC recommends nl_langinfo because getting only one facet can be
3728 faster, apparently. */
3730 /* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */
3731 #if HAVE_NL_LANGINFO && defined (DECIMAL_POINT)
3732 #define GMP_DECIMAL_POINT (nl_langinfo (DECIMAL_POINT))
3734 /* RADIXCHAR is deprecated, still in unix98 or some such. */
3735 #if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT)
3736 #define GMP_DECIMAL_POINT (nl_langinfo (RADIXCHAR))
3738 /* localeconv is slower since it returns all locale stuff */
3739 #if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT)
3740 #define GMP_DECIMAL_POINT (localeconv()->decimal_point)
3742 #if ! defined (GMP_DECIMAL_POINT)
3743 #define GMP_DECIMAL_POINT (".")
3747 #define DOPRNT_CONV_FIXED 1
3748 #define DOPRNT_CONV_SCIENTIFIC 2
3749 #define DOPRNT_CONV_GENERAL 3
3751 #define DOPRNT_JUSTIFY_NONE 0
3752 #define DOPRNT_JUSTIFY_LEFT 1
3753 #define DOPRNT_JUSTIFY_RIGHT 2
3754 #define DOPRNT_JUSTIFY_INTERNAL 3
3756 #define DOPRNT_SHOWBASE_YES 1
3757 #define DOPRNT_SHOWBASE_NO 2
3758 #define DOPRNT_SHOWBASE_NONZERO 3
3760 struct doprnt_params_t {
3761 int base; /* negative for upper case */
3762 int conv; /* choices above */
3763 const char *expfmt; /* exponent format */
3764 int exptimes4; /* exponent multiply by 4 */
3765 char fill; /* character */
3766 int justify; /* choices above */
3767 int prec; /* prec field, or -1 for all digits */
3768 int showbase; /* choices above */
3769 int showpoint; /* if radix point always shown */
3770 int showtrailing; /* if trailing zeros wanted */
3771 char sign; /* '+', ' ', or '\0' */
3772 int width; /* width field */
3775 #if _GMP_H_HAVE_VA_LIST
3777 typedef int (*doprnt_format_t) __GMP_PROTO ((void *, const char *, va_list));
3778 typedef int (*doprnt_memory_t) __GMP_PROTO ((void *, const char *, size_t));
3779 typedef int (*doprnt_reps_t) __GMP_PROTO ((void *, int, int));
3780 typedef int (*doprnt_final_t) __GMP_PROTO ((void *));
3782 struct doprnt_funs_t {
3783 doprnt_format_t format;
3784 doprnt_memory_t memory;
3786 doprnt_final_t final; /* NULL if not required */
3789 extern const struct doprnt_funs_t __gmp_fprintf_funs;
3790 extern const struct doprnt_funs_t __gmp_sprintf_funs;
3791 extern const struct doprnt_funs_t __gmp_snprintf_funs;
3792 extern const struct doprnt_funs_t __gmp_obstack_printf_funs;
3793 extern const struct doprnt_funs_t __gmp_ostream_funs;
3795 /* "buf" is a __gmp_allocate_func block of "alloc" many bytes. The first
3796 "size" of these have been written. "alloc > size" is maintained, so
3797 there's room to store a '\0' at the end. "result" is where the
3798 application wants the final block pointer. */
3799 struct gmp_asprintf_t {
3806 #define GMP_ASPRINTF_T_INIT(d, output) \
3808 (d).result = (output); \
3810 (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc); \
3814 /* If a realloc is necessary, use twice the size actually required, so as to
3815 avoid repeated small reallocs. */
3816 #define GMP_ASPRINTF_T_NEED(d, n) \
3818 size_t alloc, newsize, newalloc; \
3819 ASSERT ((d)->alloc >= (d)->size + 1); \
3821 alloc = (d)->alloc; \
3822 newsize = (d)->size + (n); \
3823 if (alloc <= newsize) \
3825 newalloc = 2*newsize; \
3826 (d)->alloc = newalloc; \
3827 (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf, \
3828 alloc, newalloc, char); \
3832 __GMP_DECLSPEC int __gmp_asprintf_memory __GMP_PROTO ((struct gmp_asprintf_t *, const char *, size_t));
3833 __GMP_DECLSPEC int __gmp_asprintf_reps __GMP_PROTO ((struct gmp_asprintf_t *, int, int));
3834 __GMP_DECLSPEC int __gmp_asprintf_final __GMP_PROTO ((struct gmp_asprintf_t *));
3836 /* buf is where to write the next output, and size is how much space is left
3837 there. If the application passed size==0 then that's what we'll have
3838 here, and nothing at all should be written. */
3839 struct gmp_snprintf_t {
3844 /* Add the bytes printed by the call to the total retval, or bail out on an
3846 #define DOPRNT_ACCUMULATE(call) \
3854 #define DOPRNT_ACCUMULATE_FUN(fun, params) \
3856 ASSERT ((fun) != NULL); \
3857 DOPRNT_ACCUMULATE ((*(fun)) params); \
3860 #define DOPRNT_FORMAT(fmt, ap) \
3861 DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
3862 #define DOPRNT_MEMORY(ptr, len) \
3863 DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
3864 #define DOPRNT_REPS(c, n) \
3865 DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
3867 #define DOPRNT_STRING(str) DOPRNT_MEMORY (str, strlen (str))
3869 #define DOPRNT_REPS_MAYBE(c, n) \
3872 DOPRNT_REPS (c, n); \
3874 #define DOPRNT_MEMORY_MAYBE(ptr, len) \
3877 DOPRNT_MEMORY (ptr, len); \
3880 __GMP_DECLSPEC int __gmp_doprnt __GMP_PROTO ((const struct doprnt_funs_t *, void *, const char *, va_list));
3881 __GMP_DECLSPEC int __gmp_doprnt_integer __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *));
3883 #define __gmp_doprnt_mpf __gmp_doprnt_mpf2
3884 __GMP_DECLSPEC int __gmp_doprnt_mpf __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr));
3886 int __gmp_replacement_vsnprintf __GMP_PROTO ((char *, size_t, const char *, va_list));
3887 #endif /* _GMP_H_HAVE_VA_LIST */
3890 typedef int (*gmp_doscan_scan_t) __GMP_PROTO ((void *, const char *, ...));
3891 typedef void *(*gmp_doscan_step_t) __GMP_PROTO ((void *, int));
3892 typedef int (*gmp_doscan_get_t) __GMP_PROTO ((void *));
3893 typedef int (*gmp_doscan_unget_t) __GMP_PROTO ((int, void *));
3895 struct gmp_doscan_funs_t {
3896 gmp_doscan_scan_t scan;
3897 gmp_doscan_step_t step;
3898 gmp_doscan_get_t get;
3899 gmp_doscan_unget_t unget;
3901 extern const struct gmp_doscan_funs_t __gmp_fscanf_funs;
3902 extern const struct gmp_doscan_funs_t __gmp_sscanf_funs;
3904 #if _GMP_H_HAVE_VA_LIST
3905 int __gmp_doscan __GMP_PROTO ((const struct gmp_doscan_funs_t *, void *,
3906 const char *, va_list));
3910 /* For testing and debugging. */
3911 #define MPZ_CHECK_FORMAT(z) \
3913 ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0); \
3914 ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)); \
3915 ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z)); \
3918 #define MPQ_CHECK_FORMAT(q) \
3920 MPZ_CHECK_FORMAT (mpq_numref (q)); \
3921 MPZ_CHECK_FORMAT (mpq_denref (q)); \
3922 ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1); \
3924 if (SIZ(mpq_numref(q)) == 0) \
3926 /* should have zero as 0/1 */ \
3927 ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1 \
3928 && PTR(mpq_denref(q))[0] == 1); \
3932 /* should have no common factors */ \
3935 mpz_gcd (g, mpq_numref(q), mpq_denref(q)); \
3936 ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0); \
3941 #define MPF_CHECK_FORMAT(f) \
3943 ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \
3944 ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1); \
3946 ASSERT_ALWAYS (EXP(f) == 0); \
3948 ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0); \
3952 #define MPZ_PROVOKE_REALLOC(z) \
3953 do { ALLOC(z) = ABSIZ(z); } while (0)
3956 /* Enhancement: The "mod" and "gcd_1" functions below could have
3957 __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on
3958 function pointers, only actual functions. It probably doesn't make much
3959 difference to the gmp code, since hopefully we arrange calls so there's
3960 no great need for the compiler to move things around. */
3962 #if WANT_FAT_BINARY && HAVE_HOST_CPU_FAMILY_x86
3963 /* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST
3964 in mpn/x86/x86-defs.m4. Be sure to update that when changing here. */
3966 DECL_add_n ((*add_n));
3967 DECL_addmul_1 ((*addmul_1));
3968 DECL_copyd ((*copyd));
3969 DECL_copyi ((*copyi));
3970 DECL_divexact_1 ((*divexact_1));
3971 DECL_divexact_by3c ((*divexact_by3c));
3972 DECL_divrem_1 ((*divrem_1));
3973 DECL_gcd_1 ((*gcd_1));
3974 DECL_lshift ((*lshift));
3975 DECL_mod_1 ((*mod_1));
3976 DECL_mod_34lsub1 ((*mod_34lsub1));
3977 DECL_modexact_1c_odd ((*modexact_1c_odd));
3978 DECL_mul_1 ((*mul_1));
3979 DECL_mul_basecase ((*mul_basecase));
3980 DECL_preinv_divrem_1 ((*preinv_divrem_1));
3981 DECL_preinv_mod_1 ((*preinv_mod_1));
3982 DECL_rshift ((*rshift));
3983 DECL_sqr_basecase ((*sqr_basecase));
3984 DECL_sub_n ((*sub_n));
3985 DECL_submul_1 ((*submul_1));
3987 mp_size_t mul_karatsuba_threshold;
3988 mp_size_t mul_toom3_threshold;
3989 mp_size_t sqr_karatsuba_threshold;
3990 mp_size_t sqr_toom3_threshold;
3992 __GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;
3993 #endif /* x86 fat binary */
3995 void __gmpn_cpuvec_init __GMP_PROTO ((void));
3997 /* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init()
3998 if that hasn't yet been done (to establish the right values). */
3999 #define CPUVEC_THRESHOLD(field) \
4000 ((LIKELY (__gmpn_cpuvec.initialized) ? 0 : (__gmpn_cpuvec_init (), 0)), \
4001 __gmpn_cpuvec.field)
4004 #if HAVE_NATIVE_mpn_add_nc
4005 #define mpn_add_nc __MPN(add_nc)
4006 __GMP_DECLSPEC mp_limb_t mpn_add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
4010 mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4013 co = mpn_add_n (rp, up, vp, n);
4014 co += mpn_add_1 (rp, rp, n, ci);
4019 #if HAVE_NATIVE_mpn_sub_nc
4020 #define mpn_sub_nc __MPN(sub_nc)
4021 __GMP_DECLSPEC mp_limb_t mpn_sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
4023 static inline mp_limb_t
4024 mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4027 co = mpn_sub_n (rp, up, vp, n);
4028 co += mpn_sub_1 (rp, rp, n, ci);
4034 mpn_zero_p (mp_srcptr ap, mp_size_t n)
4037 for (i = n - 1; i >= 0; i--)
4045 #if TUNE_PROGRAM_BUILD
4046 /* Some extras wanted when recompiling some .c files for use by the tune
4047 program. Not part of a normal build.
4049 It's necessary to keep these thresholds as #defines (just to an
4050 identically named variable), since various defaults are established based
4051 on #ifdef in the .c files. For some this is not so (the defaults are
4052 instead established above), but all are done this way for consistency. */
4054 #undef MUL_KARATSUBA_THRESHOLD
4055 #define MUL_KARATSUBA_THRESHOLD mul_karatsuba_threshold
4056 extern mp_size_t mul_karatsuba_threshold;
4058 #undef MUL_TOOM3_THRESHOLD
4059 #define MUL_TOOM3_THRESHOLD mul_toom3_threshold
4060 extern mp_size_t mul_toom3_threshold;
4062 #undef MUL_TOOM44_THRESHOLD
4063 #define MUL_TOOM44_THRESHOLD mul_toom44_threshold
4064 extern mp_size_t mul_toom44_threshold;
4066 #undef MUL_FFT_THRESHOLD
4067 #define MUL_FFT_THRESHOLD mul_fft_threshold
4068 extern mp_size_t mul_fft_threshold;
4070 #undef MUL_FFT_MODF_THRESHOLD
4071 #define MUL_FFT_MODF_THRESHOLD mul_fft_modf_threshold
4072 extern mp_size_t mul_fft_modf_threshold;
4074 #undef MUL_FFT_TABLE
4075 #define MUL_FFT_TABLE { 0 }
4077 /* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should
4078 remain as zero (always use it). */
4079 #if ! HAVE_NATIVE_mpn_sqr_basecase
4080 #undef SQR_BASECASE_THRESHOLD
4081 #define SQR_BASECASE_THRESHOLD sqr_basecase_threshold
4082 extern mp_size_t sqr_basecase_threshold;
4085 #if TUNE_PROGRAM_BUILD_SQR
4086 #undef SQR_KARATSUBA_THRESHOLD
4087 #define SQR_KARATSUBA_THRESHOLD SQR_KARATSUBA_MAX_GENERIC
4089 #undef SQR_KARATSUBA_THRESHOLD
4090 #define SQR_KARATSUBA_THRESHOLD sqr_karatsuba_threshold
4091 extern mp_size_t sqr_karatsuba_threshold;
4094 #undef SQR_TOOM3_THRESHOLD
4095 #define SQR_TOOM3_THRESHOLD sqr_toom3_threshold
4096 extern mp_size_t sqr_toom3_threshold;
4098 #undef SQR_TOOM4_THRESHOLD
4099 #define SQR_TOOM4_THRESHOLD sqr_toom4_threshold
4100 extern mp_size_t sqr_toom4_threshold;
4102 #undef SQR_FFT_THRESHOLD
4103 #define SQR_FFT_THRESHOLD sqr_fft_threshold
4104 extern mp_size_t sqr_fft_threshold;
4106 #undef SQR_FFT_MODF_THRESHOLD
4107 #define SQR_FFT_MODF_THRESHOLD sqr_fft_modf_threshold
4108 extern mp_size_t sqr_fft_modf_threshold;
4110 #undef SQR_FFT_TABLE
4111 #define SQR_FFT_TABLE { 0 }
4113 #undef MULLOW_BASECASE_THRESHOLD
4114 #define MULLOW_BASECASE_THRESHOLD mullow_basecase_threshold
4115 extern mp_size_t mullow_basecase_threshold;
4117 #undef MULLOW_DC_THRESHOLD
4118 #define MULLOW_DC_THRESHOLD mullow_dc_threshold
4119 extern mp_size_t mullow_dc_threshold;
4121 #undef MULLOW_MUL_N_THRESHOLD
4122 #define MULLOW_MUL_N_THRESHOLD mullow_mul_n_threshold
4123 extern mp_size_t mullow_mul_n_threshold;
4126 #if ! UDIV_PREINV_ALWAYS
4127 #undef DIV_SB_PREINV_THRESHOLD
4128 #define DIV_SB_PREINV_THRESHOLD div_sb_preinv_threshold
4129 extern mp_size_t div_sb_preinv_threshold;
4132 #undef DIV_DC_THRESHOLD
4133 #define DIV_DC_THRESHOLD div_dc_threshold
4134 extern mp_size_t div_dc_threshold;
4136 #undef POWM_THRESHOLD
4137 #define POWM_THRESHOLD powm_threshold
4138 extern mp_size_t powm_threshold;
4140 #undef MATRIX22_STRASSEN_THRESHOLD
4141 #define MATRIX22_STRASSEN_THRESHOLD matrix22_strassen_threshold
4142 extern mp_size_t matrix22_strassen_threshold;
4144 #undef HGCD_THRESHOLD
4145 #define HGCD_THRESHOLD hgcd_threshold
4146 extern mp_size_t hgcd_threshold;
4148 #undef GCD_ACCEL_THRESHOLD
4149 #define GCD_ACCEL_THRESHOLD gcd_accel_threshold
4150 extern mp_size_t gcd_accel_threshold;
4152 #undef GCD_DC_THRESHOLD
4153 #define GCD_DC_THRESHOLD gcd_dc_threshold
4154 extern mp_size_t gcd_dc_threshold;
4156 #undef GCDEXT_DC_THRESHOLD
4157 #define GCDEXT_DC_THRESHOLD gcdext_dc_threshold
4158 extern mp_size_t gcdext_dc_threshold;
4160 #undef DIVREM_1_NORM_THRESHOLD
4161 #define DIVREM_1_NORM_THRESHOLD divrem_1_norm_threshold
4162 extern mp_size_t divrem_1_norm_threshold;
4164 #undef DIVREM_1_UNNORM_THRESHOLD
4165 #define DIVREM_1_UNNORM_THRESHOLD divrem_1_unnorm_threshold
4166 extern mp_size_t divrem_1_unnorm_threshold;
4168 #undef MOD_1_NORM_THRESHOLD
4169 #define MOD_1_NORM_THRESHOLD mod_1_norm_threshold
4170 extern mp_size_t mod_1_norm_threshold;
4172 #undef MOD_1_UNNORM_THRESHOLD
4173 #define MOD_1_UNNORM_THRESHOLD mod_1_unnorm_threshold
4174 extern mp_size_t mod_1_unnorm_threshold;
4176 #undef MOD_1_1_THRESHOLD
4177 #define MOD_1_1_THRESHOLD mod_1_1_threshold
4178 extern mp_size_t mod_1_1_threshold;
4180 #undef MOD_1_2_THRESHOLD
4181 #define MOD_1_2_THRESHOLD mod_1_2_threshold
4182 extern mp_size_t mod_1_2_threshold;
4184 #undef MOD_1_3_THRESHOLD
4185 #define MOD_1_3_THRESHOLD mod_1_3_threshold
4186 extern mp_size_t mod_1_3_threshold;
4188 #undef MOD_1_4_THRESHOLD
4189 #define MOD_1_4_THRESHOLD mod_1_4_threshold
4190 extern mp_size_t mod_1_4_threshold;
4192 #if ! UDIV_PREINV_ALWAYS
4193 #undef DIVREM_2_THRESHOLD
4194 #define DIVREM_2_THRESHOLD divrem_2_threshold
4195 extern mp_size_t divrem_2_threshold;
4198 #undef GET_STR_DC_THRESHOLD
4199 #define GET_STR_DC_THRESHOLD get_str_dc_threshold
4200 extern mp_size_t get_str_dc_threshold;
4202 #undef GET_STR_PRECOMPUTE_THRESHOLD
4203 #define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold
4204 extern mp_size_t get_str_precompute_threshold;
4206 #undef SET_STR_DC_THRESHOLD
4207 #define SET_STR_DC_THRESHOLD set_str_dc_threshold
4208 extern mp_size_t set_str_dc_threshold;
4210 #undef SET_STR_PRECOMPUTE_THRESHOLD
4211 #define SET_STR_PRECOMPUTE_THRESHOLD set_str_precompute_threshold
4212 extern mp_size_t set_str_precompute_threshold;
4214 #undef SET_STR_THRESHOLD
4215 #define SET_STR_THRESHOLD set_str_threshold
4216 extern mp_size_t SET_STR_THRESHOLD;
4218 #undef FFT_TABLE_ATTRS
4219 #define FFT_TABLE_ATTRS
4220 extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
4222 /* Sizes the tune program tests up to, used in a couple of recompilations. */
4223 #undef MUL_KARATSUBA_THRESHOLD_LIMIT
4224 #undef MUL_TOOM3_THRESHOLD_LIMIT
4225 #undef MULLOW_BASECASE_THRESHOLD_LIMIT
4226 #undef SQR_TOOM3_THRESHOLD_LIMIT
4227 #define SQR_KARATSUBA_MAX_GENERIC 200
4228 #define MUL_KARATSUBA_THRESHOLD_LIMIT 700
4229 #define MUL_TOOM3_THRESHOLD_LIMIT 700
4230 #define SQR_TOOM3_THRESHOLD_LIMIT 400
4231 #define MUL_TOOM44_THRESHOLD_LIMIT 1000
4232 #define SQR_TOOM4_THRESHOLD_LIMIT 1000
4233 #define MULLOW_BASECASE_THRESHOLD_LIMIT 200
4234 #define GET_STR_THRESHOLD_LIMIT 150
4236 /* "thresh" will normally be a variable when tuning, so use the cached
4237 result. This helps mpn_sb_divrem_mn for instance. */
4238 #undef CACHED_ABOVE_THRESHOLD
4239 #define CACHED_ABOVE_THRESHOLD(cache, thresh) (cache)
4240 #undef CACHED_BELOW_THRESHOLD
4241 #define CACHED_BELOW_THRESHOLD(cache, thresh) (cache)
4243 #endif /* TUNE_PROGRAM_BUILD */
4245 #if defined (__cplusplus)
4252 /* A little helper for a null-terminated __gmp_allocate_func string.
4253 The destructor ensures it's freed even if an exception is thrown.
4254 The len field is needed by the destructor, and can be used by anyone else
4255 to avoid a second strlen pass over the data.
4257 Since our input is a C string, using strlen is correct. Perhaps it'd be
4258 more C++-ish style to use std::char_traits<char>::length, but char_traits
4259 isn't available in gcc 2.95.4. */
4261 class gmp_allocated_string {
4265 gmp_allocated_string(char *arg)
4268 len = std::strlen (str);
4270 ~gmp_allocated_string()
4272 (*__gmp_free_func) (str, len+1);
4276 std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);
4277 int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
4278 void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
4279 void __gmp_doprnt_params_from_ios (struct doprnt_params_t *p, std::ios &o);
4280 std::ostream& __gmp_doprnt_integer_ostream (std::ostream &o, struct doprnt_params_t *p, char *s);
4281 extern const struct doprnt_funs_t __gmp_asprintf_funs_noformat;
4283 #endif /* __cplusplus */
4285 #endif /* __GMP_IMPL_H__ */