1 /* mpn_toom22_mul -- Multiply {ap,an} and {bp,bn} where an >= bn. Or more
2 accurately, bn <= an < 2bn.
4 Contributed to the GNU project by Torbjorn Granlund.
6 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
7 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
8 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
10 Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
12 This file is part of the GNU MP Library.
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of the GNU Lesser General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or (at your
17 option) any later version.
19 The GNU MP Library is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
22 License for more details.
24 You should have received a copy of the GNU Lesser General Public License
25 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
31 /* Evaluate in: -1, 0, +inf
39 v0 = a0 * b0 # A(0)*B(0)
40 vm1 = (a0- a1)*(b0- b1) # A(-1)*B(-1)
41 vinf= a1 * b1 # A(inf)*B(inf)
44 #if TUNE_PROGRAM_BUILD
45 #define MAYBE_mul_toom22 1
47 #define MAYBE_mul_toom22 \
48 (MUL_TOOM33_THRESHOLD >= 2 * MUL_TOOM22_THRESHOLD)
51 #define TOOM22_MUL_N_REC(p, a, b, n, ws) \
53 if (! MAYBE_mul_toom22 \
54 || BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \
55 mpn_mul_basecase (p, a, n, b, n); \
57 mpn_toom22_mul (p, a, n, b, n, ws); \
60 /* Normally, this calls mul_basecase or toom22_mul. But when when the fraction
61 MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD is large, an initially small
62 relative unbalance will become a larger and larger relative unbalance with
63 each recursion (the difference s-t will be invariant over recursive calls).
64 Therefore, we need to call toom32_mul. FIXME: Suppress depending on
65 MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD and on MUL_TOOM22_THRESHOLD. */
66 #define TOOM22_MUL_REC(p, a, an, b, bn, ws) \
68 if (! MAYBE_mul_toom22 \
69 || BELOW_THRESHOLD (bn, MUL_TOOM22_THRESHOLD)) \
70 mpn_mul_basecase (p, a, an, b, bn); \
71 else if (4 * an < 5 * bn) \
72 mpn_toom22_mul (p, a, an, b, bn, ws); \
74 mpn_toom32_mul (p, a, an, b, bn, ws); \
78 mpn_toom22_mul (mp_ptr pp,
79 mp_srcptr ap, mp_size_t an,
80 mp_srcptr bp, mp_size_t bn,
100 ASSERT (0 < s && s <= n);
101 ASSERT (0 < t && t <= s);
111 if (mpn_cmp (a0, a1, n) < 0)
113 mpn_sub_n (asm1, a1, a0, n);
118 mpn_sub_n (asm1, a0, a1, n);
123 if (mpn_zero_p (a0 + s, n - s) && mpn_cmp (a0, a1, s) < 0)
125 mpn_sub_n (asm1, a1, a0, s);
126 MPN_ZERO (asm1 + s, n - s);
131 mpn_sub (asm1, a0, n, a1, s);
138 if (mpn_cmp (b0, b1, n) < 0)
140 mpn_sub_n (bsm1, b1, b0, n);
145 mpn_sub_n (bsm1, b0, b1, n);
150 if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
152 mpn_sub_n (bsm1, b1, b0, t);
153 MPN_ZERO (bsm1 + t, n - t);
158 mpn_sub (bsm1, b0, n, b1, t);
162 #define v0 pp /* 2n */
163 #define vinf (pp + 2 * n) /* s+t */
164 #define vm1 scratch /* 2n */
165 #define scratch_out scratch + 2 * n
168 TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
170 if (s > t) TOOM22_MUL_REC (vinf, a1, s, b1, t, scratch_out);
171 else TOOM22_MUL_N_REC (vinf, a1, b1, s, scratch_out);
174 TOOM22_MUL_N_REC (v0, ap, bp, n, scratch_out);
176 /* H(v0) + L(vinf) */
177 cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
180 cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
182 /* L(vinf) + H(vinf) */
183 cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + t - n);
186 cy += mpn_add_n (pp + n, pp + n, vm1, 2 * n);
188 cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
190 ASSERT (cy + 1 <= 3);
193 mpn_incr_u (pp + 2 * n, cy2);
194 if (LIKELY (cy <= 2))
195 mpn_incr_u (pp + 3 * n, cy);
197 mpn_decr_u (pp + 3 * n, 1);