3 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
4 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
5 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
7 Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
9 This file is part of the GNU MP Library.
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 License for more details.
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
28 #define MUL(rp, ap, an, bp, bn) do { \
30 mpn_mul (rp, ap, an, bp, bn); \
32 mpn_mul (rp, bp, bn, ap, an); \
35 /* Inputs are unsigned. */
37 abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
40 MPN_CMP (c, ap, bp, n);
43 mpn_sub_n (rp, ap, bp, n);
48 mpn_sub_n (rp, bp, ap, n);
54 add_signed_n (mp_ptr rp,
55 mp_srcptr ap, int as, mp_srcptr bp, int bs, mp_size_t n)
58 return as ^ abs_sub_n (rp, ap, bp, n);
61 ASSERT_NOCARRY (mpn_add_n (rp, ap, bp, n));
67 mpn_matrix22_mul_itch (mp_size_t rn, mp_size_t mn)
69 if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD)
70 || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))
73 return 4*(rn + mn) + 5;
78 / s0 \ / 1 0 0 0 \ / r0 \
79 | s1 | | 0 1 0 0 | | r1 |
80 | s2 | | 0 0 1 1 | | r2 |
81 | s3 | = | -1 0 1 1 | \ r3 /
86 / t0 \ / 1 0 0 0 \ / m0 \
87 | t1 | | 0 0 1 0 | | m1 |
88 | t2 | | -1 1 0 0 | | m2 |
89 | t3 | = | 1 -1 0 1 | \ m3 /
94 / r0 \ / 1 1 0 0 0 0 0 \ / s0 * t0 \
95 | r1 | = | 1 0 1 1 0 1 0 | | s1 * t1 |
96 | r2 | | 1 0 0 1 1 0 1 | | s2 * t2 |
97 \ r3 / \ 1 0 1 1 1 0 0 / | s3 * t3 |
103 /* Computes R = R * M. Elements are numbers R = (r0, r1; r2, r3).
105 * Resulting elements are of size up to rn + mn + 1.
107 * Temporary storage: 4 rn + 4 mn + 5. */
109 mpn_matrix22_mul_strassen (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
110 mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
113 mp_ptr s2, s3, t2, t3, u0, u1;
114 int r2s, r3s, s3s, t2s, t3s, u0s, u1s;
116 s3 = tp; tp += rn + 1;
118 t3 = tp; tp += mn + 1;
119 u0 = tp; tp += rn + mn + 1;
120 u1 = tp; /* rn + mn + 2 */
122 MUL (u0, r0, rn, m0, mn); /* 0 */
123 MUL (u1, r1, rn, m2, mn); /* 1 */
125 MPN_COPY (s2, r3, rn);
127 r3[rn] = mpn_add_n (r3, r3, r2, rn);
129 s3s = abs_sub_n (s3, r3, r0, rn + 1);
130 t2s = abs_sub_n (t2, m1, m0, mn);
133 t3[mn] = mpn_add_n (t3, m3, t2, mn);
138 t3s = abs_sub_n (t3, m3, t2, mn);
142 r2s = abs_sub_n (r2, r0, r2, rn);
143 r0[rn+mn] = mpn_add_n (r0, u0, u1, rn + mn);
145 MUL(u1, s3, rn+1, t3, mn+1); /* 3 */
147 ASSERT (u1[rn+mn+1] == 0);
148 ASSERT (u1[rn+mn] < 4);
153 u0s = abs_sub_n (u0, u0, u1, rn + mn + 1);
157 u0[rn+mn] = u1[rn+mn] + mpn_add_n (u0, u0, u1, rn + mn);
160 MUL(u1, r3, rn + 1, t2, mn); /* 2 */
162 ASSERT (u1[rn+mn] < 2);
164 u1s = add_signed_n (u1, u0, u0s, u1, u1s, rn + mn + 1);
166 t2s = abs_sub_n (t2, m3, m1, mn);
169 s3[rn] += mpn_add_n (s3, s3, r1, rn);
174 s3[rn] -= mpn_sub_n (s3, s3, r1, rn);
179 s3s = abs_sub_n (s3, r1, s3, rn);
181 MUL (r1, s3, rn+1, m3, mn); /* 5 */
182 ASSERT_NOCARRY(add_signed_n (r1, r1, s3s, u1, u1s, rn + mn + 1));
183 ASSERT (r1[rn + mn] < 2);
185 MUL (r3, r2, rn, t2, mn); /* 4 */
188 u0s = add_signed_n (u0, u0, u0s, r3, r3s, rn + mn + 1);
189 ASSERT_NOCARRY (add_signed_n (r3, r3, r3s, u1, u1s, rn + mn + 1));
190 ASSERT (r3[rn + mn] < 2);
194 t3[mn] += mpn_add_n (t3, m2, t3, mn);
199 t3[mn] -= mpn_sub_n (t3, t3, m2, mn);
204 t3s = abs_sub_n (t3, m2, t3, mn);
206 MUL (r2, s2, rn, t3, mn + 1); /* 6 */
208 ASSERT_NOCARRY (add_signed_n (r2, r2, t3s, u0, u0s, rn + mn + 1));
209 ASSERT (r2[rn + mn] < 2);
213 mpn_matrix22_mul (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
214 mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
217 if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD)
218 || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))
223 /* Temporary storage: 3 rn + 2 mn */
227 for (i = 0; i < 2; i++)
229 MPN_COPY (tp, r0, rn);
233 mpn_mul (p0, r0, rn, m0, mn);
234 mpn_mul (p1, r1, rn, m3, mn);
235 mpn_mul (r0, r1, rn, m2, mn);
236 mpn_mul (r1, tp, rn, m1, mn);
240 mpn_mul (p0, m0, mn, r0, rn);
241 mpn_mul (p1, m3, mn, r1, rn);
242 mpn_mul (r0, m2, mn, r1, rn);
243 mpn_mul (r1, m1, mn, tp, rn);
245 r0[rn+mn] = mpn_add_n (r0, r0, p0, rn + mn);
246 r1[rn+mn] = mpn_add_n (r1, r1, p1, rn + mn);
252 mpn_matrix22_mul_strassen (r0, r1, r2, r3, rn,
253 m0, m1, m2, m3, mn, tp);