3 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
4 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
5 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
7 Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008 Free Software
10 This file is part of the GNU MP Library.
12 The GNU MP Library is free software; you can redistribute it and/or modify
13 it under the terms of the GNU Lesser General Public License as published by
14 the Free Software Foundation; either version 3 of the License, or (at your
15 option) any later version.
17 The GNU MP Library is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
19 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
20 License for more details.
22 You should have received a copy of the GNU Lesser General Public License
23 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
29 #if GMP_NAIL_BITS == 0
31 /* Copied from the old mpn/generic/gcdext.c, and modified slightly to return
34 /* Single-limb division optimized for small quotients. */
35 static inline mp_limb_t
42 if ((mp_limb_signed_t) n0 < 0)
45 for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
66 for (cnt = 0; n0 >= d0; cnt++)
88 /* Two-limb division optimized for small quotients. */
89 static inline mp_limb_t
91 mp_limb_t nh, mp_limb_t nl,
92 mp_limb_t dh, mp_limb_t dl)
96 if ((mp_limb_signed_t) nh < 0)
99 for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)
101 dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
108 if (nh > dh || (nh == dh && nl >= dl))
110 sub_ddmmss (nh, nl, nh, nl, dh, dl);
113 dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
121 for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
123 dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
129 dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
132 if (nh > dh || (nh == dh && nl >= dl))
134 sub_ddmmss (nh, nl, nh, nl, dh, dl);
148 /* This div2 uses less branches, but it seems to nevertheless be
149 slightly slower than the above code. */
150 static inline mp_limb_t
152 mp_limb_t nh, mp_limb_t nl,
153 mp_limb_t dh, mp_limb_t dl)
159 count_leading_zeros (ncnt, nh);
160 count_leading_zeros (dcnt, dh);
163 dh = (dh << dcnt) + (-(dcnt > 0) & (dl >> (GMP_LIMB_BITS - dcnt)));
170 if (UNLIKELY (nh == dh))
177 sub_ddmmss (nh, nl, nh, nl, (-bit) & dh, (-bit) & dl);
179 dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
191 #else /* GMP_NAIL_BITS != 0 */
192 /* Check all functions for nail support. */
193 /* hgcd2 should be defined to take inputs including nail bits, and
194 produce a matrix with elements also including nail bits. This is
195 necessary, for the matrix elements to be useful with mpn_mul_1,
196 mpn_addmul_1 and friends. */
197 #error Not implemented
198 #endif /* GMP_NAIL_BITS != 0 */
200 /* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
201 matrix M. Returns 1 if we make progress, i.e. can perform at least
202 one subtraction. Otherwise returns zero.. */
204 /* FIXME: Possible optimizations:
206 The div2 function starts with checking the most significant bit of
207 the numerator. We can maintained normalized operands here, call
208 hgcd with normalized operands only, which should make the code
209 simpler and possibly faster.
211 Experiment with table lookups on the most significant bits.
213 This function is also a candidate for assembler implementation.
216 mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
217 struct hgcd_matrix1 *M)
219 mp_limb_t u00, u01, u10, u11;
221 if (ah < 2 || bh < 2)
224 if (ah > bh || (ah == bh && al > bl))
226 sub_ddmmss (ah, al, ah, al, bh, bl);
235 sub_ddmmss (bh, bl, bh, bl, ah, al);
252 if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
254 ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
255 bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
260 /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
261 1), affecting the second column of M. */
263 sub_ddmmss (ah, al, ah, al, bh, bl);
277 mp_limb_t q = div2 (r, ah, al, bh, bl);
278 al = r[0]; ah = r[1];
281 /* A is too small, but q is correct. */
295 if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
297 ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
298 bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
303 /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
304 1), affecting the first column of M. */
305 sub_ddmmss (bh, bl, bh, bl, ah, al);
319 mp_limb_t q = div2 (r, bh, bl, ah, al);
320 bl = r[0]; bh = r[1];
323 /* B is too small, but q is correct. */
334 /* NOTE: Since we discard the least significant half limb, we don't
335 get a truly maximal M (corresponding to |a - b| <
336 2^{GMP_LIMB_BITS +1}). */
337 /* Single precision loop */
345 if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
357 mp_limb_t q = div1 (&r, ah, bh);
359 if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
361 /* A is too small, but q is correct. */
376 if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
388 mp_limb_t q = div1 (&r, bh, ah);
390 if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
392 /* B is too small, but q is correct. */
404 M->u[0][0] = u00; M->u[0][1] = u01;
405 M->u[1][0] = u10; M->u[1][1] = u11;
410 /* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
411 * have space for n + 1 limbs. Uses three buffers to avoid a copy*/
413 mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
414 mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
418 /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
426 #if HAVE_NATIVE_mpn_addaddmul_1msb0
427 ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
428 bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
430 ah = mpn_mul_1 (rp, ap, n, M->u[0][0]);
431 ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);
433 bh = mpn_mul_1 (bp, bp, n, M->u[1][1]);
434 bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
443 /* Sets (r;b) = M^{-1}(a;b), with M^{-1} = (u11, -u01; -u10, u00) from
444 the left. Uses three buffers, to avoid a copy. */
446 mpn_hgcd_mul_matrix1_inverse_vector (const struct hgcd_matrix1 *M,
447 mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
451 /* Compute (r;b) <-- (u11 a - u01 b; -u10 a + u00 b) as
459 h0 = mpn_mul_1 (rp, ap, n, M->u[1][1]);
460 h1 = mpn_submul_1 (rp, bp, n, M->u[0][1]);
463 h0 = mpn_mul_1 (bp, bp, n, M->u[0][0]);
464 h1 = mpn_submul_1 (bp, ap, n, M->u[1][0]);
467 n -= (rp[n-1] | bp[n-1]) == 0;