1 /* mpn_toom_interpolate_7pts -- Interpolate for toom44, 53, 62.
3 Contributed to the GNU project by Niels Möller.
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
11 This file is part of the GNU MP Library.
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 License for more details.
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
29 /* Arithmetic right shift, requiring that the shifted out bits are zero. */
31 divexact_2exp (mp_ptr rp, mp_srcptr sp, mp_size_t n, unsigned shift)
34 sign = LIMB_HIGHBIT_TO_MASK (sp[n-1] << GMP_NAIL_BITS) << (GMP_NUMB_BITS - shift);
35 ASSERT_NOCARRY (mpn_rshift (rp, sp, n, shift));
36 rp[n-1] |= sign & GMP_NUMB_MASK;
39 /* For odd divisors, mpn_divexact_1 works fine with two's complement. */
40 #ifndef mpn_divexact_by3
41 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
43 #ifndef mpn_divexact_by9
44 #define mpn_divexact_by9(dst,src,size) mpn_divexact_1(dst,src,size,9)
46 #ifndef mpn_divexact_by15
47 #define mpn_divexact_by15(dst,src,size) mpn_divexact_1(dst,src,size,15)
50 /* Interpolation for toom4, using the evaluation points infinity, 2,
51 1, -1, 1/2, -1/2. More precisely, we want to compute
52 f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 6, given the
61 w6 = limit at infinity of f(x) / x^6,
63 The result is 6*n + w6n limbs. At entry, w0 is stored at {rp, 2n },
64 w2 is stored at { rp + 2n, 2n+1 }, and w6 is stored at { rp + 6n,
65 w6n }. The other values are 2n + 1 limbs each (with most
66 significant limbs small). f(-1) and f(-1/2) may be negative, signs
67 determined by the flag bits. All intermediate results are
68 represented in two's complement. Inputs are destroyed.
70 Needs (2*n + 1) limbs of temporary storage.
74 mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom4_flags flags,
75 mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5,
76 mp_size_t w6n, mp_ptr tp)
78 mp_size_t m = 2*n + 1;
86 /* Using Marco Bodrato's formulas
107 where W0 = f(0), W1 = 64 f(-1/2), W2 = 64 f(1/2), W3 = f(-1),
108 W4 = f(1), W5 = f(2), W6 = f(oo),
111 mpn_add_n (w5, w5, w2, m);
112 if (flags & toom4_w3_neg)
113 mpn_add_n (w3, w3, w4, m);
115 mpn_sub_n (w3, w4, w3, m);
116 divexact_2exp (w3, w3, m, 1);
117 if (flags & toom4_w1_neg)
118 mpn_add_n (w1, w1, w2, m);
120 mpn_sub_n (w1, w2, w1, m);
121 mpn_sub (w2, w2, m, w6, w6n);
122 tp[2*n] = mpn_lshift (tp, rp, 2*n, 6);
123 mpn_sub_n (w2, w2, tp, m);
124 mpn_lshift (w2, w2, m, 1);
125 mpn_sub_n (w2, w2, w1, m);
126 divexact_2exp (w2, w2, m, 3);
127 mpn_sub_n (w4, w4, w3, m);
129 mpn_submul_1 (w5, w4, m, 65);
130 mpn_sub (w4, w4, m, w6, w6n);
131 mpn_sub (w4, w4, m, rp, 2*n);
132 mpn_addmul_1 (w5, w4, m, 45);
133 mpn_sub_n (w2, w2, w4, m);
134 /* Rely on divexact working with two's complement */
135 mpn_divexact_by3 (w2, w2, m);
136 mpn_sub_n (w4, w4, w2, m);
138 mpn_sub_n (w1, w1, w5, m);
139 mpn_lshift (tp, w3, m, 4);
140 mpn_sub_n (w5, w5, tp, m);
141 divexact_2exp (w5, w5, m, 1);
142 mpn_divexact_by9 (w5, w5, m);
143 mpn_sub_n (w3, w3, w5, m);
144 divexact_2exp (w1, w1, m, 1);
145 mpn_divexact_by15 (w1, w1, m);
146 mpn_add_n (w1, w1, w5, m);
147 divexact_2exp (w1, w1, m, 1);
148 mpn_sub_n (w5, w5, w1, m);
150 /* Two's complement coefficients must be non-negative at the end of
152 ASSERT ( !(w1[2*n] & GMP_LIMB_HIGHBIT));
153 ASSERT ( !(w2[2*n] & GMP_LIMB_HIGHBIT));
154 ASSERT ( !(w3[2*n] & GMP_LIMB_HIGHBIT));
155 ASSERT ( !(w4[2*n] & GMP_LIMB_HIGHBIT));
156 ASSERT ( !(w5[2*n] & GMP_LIMB_HIGHBIT));
158 /* Addition chain. Note carries and the 2n'th limbs that need to be
161 * Special care is needed for w2[2n] and the corresponding carry,
162 * since the "simple" way of adding it all together would overwrite
163 * the limb at wp[2*n] and rp[4*n] (same location) with the sum of
164 * the high half of w3 and the low half of w4.
170 * ||w5 (2n+1)| ||w1 (2n+1)|
171 * + | w6 (w6n)| ||w2 (2n+1)| w0 (2n) | (share storage with r)
172 * -----------------------------------------------
173 * r | | | | | | | | |
174 * c7 c6 c5 c4 c3 Carries to propagate
177 cy = mpn_add_n (rp + n, rp + n, w1, 2*n);
178 MPN_INCR_U (w2 + n, n + 1, w1[2*n] + cy);
179 cy = mpn_add_n (rp + 3*n, rp + 3*n, w3, n);
180 MPN_INCR_U (w3 + n, n + 1, w2[2*n] + cy);
181 cy = mpn_add_n (rp + 4*n, w3 + n, w4, n);
182 MPN_INCR_U (w4 + n, n + 1, w3[2*n] + cy);
183 cy = mpn_add_n (rp + 5*n, w4 + n, w5, n);
184 MPN_INCR_U (w5 + n, n + 1, w4[2*n] + cy);
187 mp_limb_t c7 = mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, n + 1);
188 MPN_INCR_U (rp + 7*n + 1, w6n - n - 1, c7);
192 ASSERT_NOCARRY (mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, w6n));
196 for (i = w6n; i <= n; i++)
197 ASSERT (w5[n + i] == 0);