1 /* mpn_toom52_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
2 times as large as bn. Or more accurately, bn < an < 2 bn.
4 Contributed to the GNU project by Marco Bodrato.
6 The idea of applying toom to unbalanced multiplication is due to Marco
7 Bodrato and Alberto Zanoni.
9 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
10 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
11 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
13 Copyright 2009 Free Software Foundation, Inc.
15 This file is part of the GNU MP Library.
17 The GNU MP Library is free software; you can redistribute it and/or modify
18 it under the terms of the GNU Lesser General Public License as published by
19 the Free Software Foundation; either version 3 of the License, or (at your
20 option) any later version.
22 The GNU MP Library is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
25 License for more details.
27 You should have received a copy of the GNU Lesser General Public License
28 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
34 /* Evaluate in: -2, -1, 0, +1, +2, +inf
36 <-s-><--n--><--n--><--n--><--n-->
37 ___ ______ ______ ______ ______
38 |a4_|___a3_|___a2_|___a1_|___a0_|
42 v0 = a0 * b0 # A(0)*B(0)
43 v1 = (a0+ a1+ a2+ a3+ a4)*(b0+ b1) # A(1)*B(1) ah <= 4 bh <= 1
44 vm1 = (a0- a1+ a2- a3+ a4)*(b0- b1) # A(-1)*B(-1) |ah| <= 2 bh = 0
45 v2 = (a0+2a1+4a2+8a3+16a4)*(b0+2b1) # A(2)*B(2) ah <= 30 bh <= 2
46 vm2 = (a0-2a1+4a2-8a3+16a4)*(b0-2b1) # A(-2)*B(-2) |ah| <= 20 |bh|<= 1
47 vinf= a4 * b1 # A(inf)*B(inf)
49 Some slight optimization in evaluation are taken from the paper:
50 "Towards Optimal Toom-Cook Multiplication for Univariate and
51 Multivariate Polynomials in Characteristic 2 and 0."
55 mpn_toom52_mul (mp_ptr pp,
56 mp_srcptr ap, mp_size_t an,
57 mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
60 enum toom6_flags flags;
64 #define a2 (ap + 2 * n)
65 #define a3 (ap + 3 * n)
66 #define a4 (ap + 4 * n)
70 n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
75 ASSERT (0 < s && s <= n);
76 ASSERT (0 < t && t <= n);
78 /* Ensures that 5 values of n+1 limbs each fits in the product area.
79 Borderline cases are an = 32, bn = 8, n = 7, and an = 36, bn = 9,
83 #define v0 pp /* 2n */
84 #define vm1 (scratch) /* 2n+1 */
85 #define v1 (pp + 2 * n) /* 2n+1 */
86 #define vm2 (scratch + 2 * n + 1) /* 2n+1 */
87 #define v2 (scratch + 4 * n + 2) /* 2n+1 */
88 #define vinf (pp + 5 * n) /* s+t */
89 #define bs1 pp /* n+1 */
90 #define bsm1 (scratch + 2 * n + 2) /* n */
91 #define asm1 (scratch + 3 * n + 3) /* n+1 */
92 #define asm2 (scratch + 4 * n + 4) /* n+1 */
93 #define bsm2 (pp + n + 1) /* n+1 */
94 #define bs2 (pp + 2 * n + 2) /* n+1 */
95 #define as2 (pp + 3 * n + 3) /* n+1 */
96 #define as1 (pp + 4 * n + 4) /* n+1 */
98 /* Scratch need is 6 * n + 3 + 1. We need one extra limb, because
99 products will overwrite 2n+2 limbs. */
104 /* Compute as2 and asm2. */
105 flags = toom6_vm2_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, a1a3);
107 /* Compute bs1 and bsm1. */
110 #if HAVE_NATIVE_mpn_add_n_sub_n
113 if (mpn_cmp (b0, b1, n) < 0)
115 cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
116 flags ^= toom6_vm1_neg;
120 cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
124 bs1[n] = mpn_add_n (bs1, b0, b1, n);
125 if (mpn_cmp (b0, b1, n) < 0)
127 mpn_sub_n (bsm1, b1, b0, n);
128 flags ^= toom6_vm1_neg;
132 mpn_sub_n (bsm1, b0, b1, n);
138 bs1[n] = mpn_add (bs1, b0, n, b1, t);
139 if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
141 mpn_sub_n (bsm1, b1, b0, t);
142 MPN_ZERO (bsm1 + t, n - t);
143 flags ^= toom6_vm1_neg;
147 mpn_sub (bsm1, b0, n, b1, t);
151 /* Compute bs2 and bsm2, recycling bs1 and bsm1. bs2=bs1+b1; bsm2=bsm1-b1 */
152 mpn_add (bs2, bs1, n+1, b1, t);
153 if (flags & toom6_vm1_neg )
155 bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
156 flags ^= toom6_vm2_neg;
163 if (mpn_cmp (bsm1, b1, n) < 0)
165 mpn_sub_n (bsm2, b1, bsm1, n);
166 flags ^= toom6_vm2_neg;
170 mpn_sub_n (bsm2, bsm1, b1, n);
175 if (mpn_zero_p (bsm1 + t, n - t) && mpn_cmp (bsm1, b1, t) < 0)
177 mpn_sub_n (bsm2, b1, bsm1, t);
178 MPN_ZERO (bsm2 + t, n - t);
179 flags ^= toom6_vm2_neg;
183 mpn_sub (bsm2, bsm1, n, b1, t);
188 /* Compute as1 and asm1. */
189 flags ^= toom6_vm1_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, a0a2);
191 ASSERT (as1[n] <= 4);
192 ASSERT (bs1[n] <= 1);
193 ASSERT (asm1[n] <= 2);
194 /* ASSERT (bsm1[n] <= 1); */
195 ASSERT (as2[n] <=30);
196 ASSERT (bs2[n] <= 2);
197 ASSERT (asm2[n] <= 20);
198 ASSERT (bsm2[n] <= 1);
200 /* vm1, 2n+1 limbs */
201 mpn_mul (vm1, asm1, n+1, bsm1, n); /* W4 */
203 /* vm2, 2n+1 limbs */
204 mpn_mul_n (vm2, asm2, bsm2, n+1); /* W2 */
207 mpn_mul_n (v2, as2, bs2, n+1); /* W1 */
210 mpn_mul_n (v1, as1, bs1, n+1); /* W3 */
212 /* vinf, s+t limbs */ /* W0 */
213 if (s > t) mpn_mul (vinf, a4, s, b1, t);
214 else mpn_mul (vinf, b1, t, a4, s);
217 mpn_mul_n (v0, ap, bp, n); /* W5 */
219 mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);