1 /* mpn_toom3_sqr -- Square {ap,an}.
3 Contributed to the GNU project by Torbjorn Granlund.
4 Additional improvements by Marco Bodrato.
6 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
7 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
8 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
10 Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
12 This file is part of the GNU MP Library.
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of the GNU Lesser General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or (at your
17 option) any later version.
19 The GNU MP Library is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
22 License for more details.
24 You should have received a copy of the GNU Lesser General Public License
25 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
31 /* Evaluate in: -1, 0, +1, +2, +inf
38 v1 = (a0+ a1+ a2)^2 # A(1)^2 ah <= 2
39 vm1 = (a0- a1+ a2)^2 # A(-1)^2 |ah| <= 1
40 v2 = (a0+2a1+4a2)^2 # A(2)^2 ah <= 6
41 vinf= a2 ^2 # A(inf)^2
44 #if TUNE_PROGRAM_BUILD
45 #define MAYBE_sqr_basecase 1
46 #define MAYBE_sqr_toom3 1
48 #define MAYBE_sqr_basecase \
49 (SQR_TOOM3_THRESHOLD < 3 * SQR_TOOM2_THRESHOLD)
50 #define MAYBE_sqr_toom3 \
51 (SQR_TOOM4_THRESHOLD >= 3 * SQR_TOOM3_THRESHOLD)
54 #define TOOM3_SQR_REC(p, a, n, ws) \
56 if (MAYBE_sqr_basecase \
57 && BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)) \
58 mpn_sqr_basecase (p, a, n); \
59 else if (! MAYBE_sqr_toom3 \
60 || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)) \
61 mpn_toom2_sqr (p, a, n, ws); \
63 mpn_toom3_sqr (p, a, n, ws); \
67 mpn_toom3_sqr (mp_ptr pp,
68 mp_srcptr ap, mp_size_t an,
74 mp_ptr as1, asm1, as2;
80 n = (an + 2) / (size_t) 3;
84 ASSERT (0 < s && s <= n);
86 as1 = scratch + 4 * n + 4;
87 asm1 = scratch + 2 * n + 2;
92 /* Compute as1 and asm1. */
93 cy = mpn_add (gp, a0, n, a2, s);
94 #if HAVE_NATIVE_mpn_add_n_sub_n
95 if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
97 cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
104 cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
105 as1[n] = cy + (cy2 >> 1);
106 asm1[n] = cy - (cy2 & 1);
109 as1[n] = cy + mpn_add_n (as1, gp, a1, n);
110 if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
112 mpn_sub_n (asm1, a1, gp, n);
117 cy -= mpn_sub_n (asm1, gp, a1, n);
123 #if HAVE_NATIVE_mpn_rsblsh1_n
124 cy = mpn_add_n (as2, a2, as1, s);
126 cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
128 cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
130 #if HAVE_NATIVE_mpn_addlsh1_n
131 cy = mpn_addlsh1_n (as2, a1, a2, s);
133 cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
134 cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
136 cy = mpn_add_n (as2, a2, as1, s);
138 cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
140 cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
141 cy -= mpn_sub_n (as2, as2, a0, n);
146 ASSERT (as1[n] <= 2);
147 ASSERT (asm1[n] <= 1);
149 #define v0 pp /* 2n */
150 #define v1 (pp + 2 * n) /* 2n+1 */
151 #define vinf (pp + 4 * n) /* s+s */
152 #define vm1 scratch /* 2n+1 */
153 #define v2 (scratch + 2 * n + 1) /* 2n+2 */
154 #define scratch_out (scratch + 5 * n + 5)
156 /* vm1, 2n+1 limbs */
157 #ifdef SMALLER_RECURSION
158 TOOM3_SQR_REC (vm1, asm1, n, scratch_out);
161 cy = asm1[n] + mpn_add_n (vm1 + n, vm1 + n, asm1, n);
163 cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
166 TOOM3_SQR_REC (vm1, asm1, n + 1, scratch_out);
169 TOOM3_SQR_REC (v2, as2, n + 1, scratch_out); /* v2, 2n+1 limbs */
171 TOOM3_SQR_REC (vinf, a2, s, scratch_out); /* vinf, s+s limbs */
173 vinf0 = vinf[0]; /* v1 overlaps with this */
175 #ifdef SMALLER_RECURSION
177 TOOM3_SQR_REC (v1, as1, n, scratch_out);
180 cy = as1[n] + mpn_add_n (v1 + n, v1 + n, as1, n);
182 else if (as1[n] != 0)
184 #if HAVE_NATIVE_mpn_addlsh1_n
185 cy = 2 * as1[n] + mpn_addlsh1_n (v1 + n, v1 + n, as1, n);
187 cy = 2 * as1[n] + mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
194 cy += mpn_add_n (v1 + n, v1 + n, as1, n);
196 else if (as1[n] != 0)
198 #if HAVE_NATIVE_mpn_addlsh1_n
199 cy += mpn_addlsh1_n (v1 + n, v1 + n, as1, n);
201 cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
207 TOOM3_SQR_REC (v1, as1, n + 1, scratch_out);
211 TOOM3_SQR_REC (v0, ap, n, scratch_out); /* v0, 2n limbs */
213 mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + s, 0, vinf0);