1 /* mpn_toom_interpolate_6pts -- Interpolate for toom43, 52
3 Contributed to the GNU project by Marco Bodrato.
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 Copyright 2009, 2010 Free Software Foundation, Inc.
11 This file is part of the GNU MP Library.
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 License for more details.
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
29 /* For odd divisors, mpn_divexact_1 works fine with two's complement. */
30 #ifndef mpn_divexact_by3
31 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && MODLIMB_INVERSE_3
32 #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,MODLIMB_INVERSE_3,0)
34 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
38 /* Interpolation for Toom-3.5, using the evaluation points: infinity,
39 1, -1, 2, -2. More precisely, we want to compute
40 f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 5, given the
48 w0 = limit at infinity of f(x) / x^5,
50 The result is stored in {pp, 5*n + w0n}. At entry, w5 is stored at
51 {pp, 2n}, w3 is stored at {pp + 2n, 2n+1}, and w0 is stored at
52 {pp + 5n, w0n}. The other values are 2n + 1 limbs each (with most
53 significant limbs small). f(-1) and f(-2) may be negative, signs
54 determined by the flag bits. All intermediate results are positive.
57 Interpolation sequence was taken from the paper: "Integer and
58 Polynomial Multiplication: Towards Optimal Toom-Cook Matrices".
59 Some slight variations were introduced: adaptation to "gmp
60 instruction set", and a final saving of an operation by interlacing
61 interpolation and recomposition phases.
65 mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags,
66 mp_ptr w4, mp_ptr w2, mp_ptr w1,
70 /* cy6 can be stored in w1[2*n], cy4 in w4[0], embankment in w2[0] */
71 mp_limb_t cy4, cy6, embankment;
74 ASSERT( 2*n >= w0n && w0n > 0 );
76 #define w5 pp /* 2n */
77 #define w3 (pp + 2 * n) /* 2n+1 */
78 #define w0 (pp + 5 * n) /* w0n */
80 /* Interpolate with sequence:
88 // Last steps are mixed with recomposition...
95 /* W2 =(W1 - W2)>>2 */
96 if (flags & toom6_vm2_neg)
97 mpn_add_n (w2, w1, w2, 2 * n + 1);
99 mpn_sub_n (w2, w1, w2, 2 * n + 1);
100 mpn_rshift (w2, w2, 2 * n + 1, 2);
102 /* W1 =(W1 - W5)>>1 */
103 w1[2*n] -= mpn_sub_n (w1, w1, w5, 2*n);
104 mpn_rshift (w1, w1, 2 * n + 1, 1);
106 /* W1 =(W1 - W2)>>1 */
107 #if HAVE_NATIVE_mpn_rsh1sub_n
108 mpn_rsh1sub_n (w1, w1, w2, 2 * n + 1);
110 mpn_sub_n (w1, w1, w2, 2 * n + 1);
111 mpn_rshift (w1, w1, 2 * n + 1, 1);
114 /* W4 =(W3 - W4)>>1 */
115 if (flags & toom6_vm1_neg)
117 #if HAVE_NATIVE_mpn_rsh1add_n
118 mpn_rsh1add_n (w4, w3, w4, 2 * n + 1);
120 mpn_add_n (w4, w3, w4, 2 * n + 1);
121 mpn_rshift (w4, w4, 2 * n + 1, 1);
126 #if HAVE_NATIVE_mpn_rsh1sub_n
127 mpn_rsh1sub_n (w4, w3, w4, 2 * n + 1);
129 mpn_sub_n (w4, w3, w4, 2 * n + 1);
130 mpn_rshift (w4, w4, 2 * n + 1, 1);
134 /* W2 =(W2 - W4)/3 */
135 mpn_sub_n (w2, w2, w4, 2 * n + 1);
136 mpn_divexact_by3 (w2, w2, 2 * n + 1);
138 /* W3 = W3 - W4 - W5 */
139 mpn_sub_n (w3, w3, w4, 2 * n + 1);
140 w3[2 * n] -= mpn_sub_n (w3, w3, w5, 2 * n);
142 /* W1 =(W1 - W3)/3 */
143 mpn_sub_n (w1, w1, w3, 2 * n + 1);
144 mpn_divexact_by3 (w1, w1, 2 * n + 1);
154 pp[] prior to operations:
155 |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
157 summation scheme for remaining operations:
158 |______________5|n_____4|n_____3|n_____2|n______|n______|pp
159 |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
164 |-H w0 |-L w0 ||-H w2 |-L w2 |
166 cy = mpn_add_n (pp + n, pp + n, w4, 2 * n + 1);
167 MPN_INCR_U (pp + 3 * n + 1, n, cy);
170 #if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n
171 #if HAVE_NATIVE_mpn_sublsh2_n
172 cy = mpn_sublsh2_n(w2, w2, w0, w0n);
174 cy = mpn_sublsh_n(w2, w2, w0, w0n, 2);
177 /* {W4,2*n+1} is now free and can be overwritten. */
178 cy = mpn_lshift(w4, w0, w0n, 2);
179 cy+= mpn_sub_n(w2, w2, w4, w0n);
181 MPN_DECR_U (w2 + w0n, 2 * n + 1 - w0n, cy);
183 /* W4L = W4L - W2L */
184 cy = mpn_sub_n (pp + n, pp + n, w2, n);
185 MPN_DECR_U (w3, 2 * n + 1, cy);
187 /* W3H = W3H + W2L */
188 cy4 = w3[2 * n] + mpn_add_n (pp + 3 * n, pp + 3 * n, w2, n);
190 cy = w2[2 * n] + mpn_add_n (pp + 4 * n, w1, w2 + n, n);
191 MPN_INCR_U (w1 + n, n + 1, cy);
194 if (LIKELY (w0n > n))
195 cy6 = w1[2 * n] + mpn_add_n (w0, w0, w1 + n, n);
197 cy6 = mpn_add_n (w0, w0, w1 + n, w0n);
200 summation scheme for the next operation:
201 |...____5|n_____4|n_____3|n_____2|n______|n______|pp
202 |...w0___|_w1_w2_|_H w3__|_L w3__|_H w5__|_L w5__|
205 /* if(LIKELY(w0n>n)) the two operands below DO overlap! */
206 cy = mpn_sub_n (pp + 2 * n, pp + 2 * n, pp + 4 * n, n + w0n);
208 /* embankment is a "dirty trick" to avoid carry/borrow propagation
209 beyond allocated memory */
210 embankment = w0[w0n - 1] - 1;
212 if (LIKELY (w0n > n)) {
213 if ( LIKELY(cy4 > cy6) )
214 MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6);
216 MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4);
217 MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy);
218 MPN_INCR_U (w0 + n, w0n - n, cy6);
220 MPN_INCR_U (pp + 4 * n, w0n + n, cy4);
221 MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy + cy6);
223 w0[w0n - 1] += embankment;