3 Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9 Copyright 2005, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
11 This file is part of the GNU MP Library.
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 License for more details.
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
28 The idea of the algorithm used herein is to compute a smaller inverted value
29 than used in the standard Barrett algorithm, and thus save time in the
30 Newton iterations, and pay just a small price when using the inverted value
31 for developing quotient bits. This algorithm was presented at ICMS 2006.
37 1. This is a rudimentary implementation of mpn_mu_div_q. The algorithm is
38 probably close to optimal, except when mpn_mu_divappr_q fails.
40 An alternative which could be considered for much simpler code for the
41 complex qn>=dn arm would be to allocate a temporary nn+1 limb buffer, then
42 simply call mpn_mu_divappr_q. Such a temporary allocation is
43 unfortunately very large.
45 2. We used to fall back to mpn_mu_div_qr when we detect a possible
46 mpn_mu_divappr_q rounding problem, now we multiply and compare.
47 Unfortunately, since mpn_mu_divappr_q does not return the partial
48 remainder, this also doesn't become optimal. A mpn_mu_divappr_qr could
51 3. The allocations done here should be made from the scratch area, which
52 then would need to be amended.
55 #include <stdlib.h> /* for NULL */
61 mpn_mu_div_q (mp_ptr qp,
62 mp_srcptr np, mp_size_t nn,
63 mp_srcptr dp, mp_size_t dn,
66 mp_ptr tp, rp, ip, this_ip;
67 mp_size_t qn, in, this_in;
75 tp = TMP_BALLOC_LIMBS (qn + 1);
77 if (qn >= dn) /* nn >= 2*dn + 1 */
79 /* Find max inverse size needed by the two preinv calls. FIXME: This is
80 not optimal, it underestimates the invariance. */
85 in1 = mpn_mu_div_qr_choose_in (qn - dn, dn, 0);
86 in2 = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
91 in = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
94 ip = TMP_BALLOC_LIMBS (in + 1);
98 MPN_COPY (scratch + 1, dp, in);
100 mpn_invertappr (ip, scratch, in + 1, NULL);
101 MPN_COPY_INCR (ip, ip + 1, in);
105 cy = mpn_add_1 (scratch, dp + dn - (in + 1), in + 1, 1);
106 if (UNLIKELY (cy != 0))
110 mpn_invertappr (ip, scratch, in + 1, NULL);
111 MPN_COPY_INCR (ip, ip + 1, in);
115 /* |_______________________| dividend
116 |________| divisor */
117 rp = TMP_BALLOC_LIMBS (2 * dn + 1);
119 this_in = mpn_mu_div_qr_choose_in (qn - dn, dn, 0);
120 this_ip = ip + in - this_in;
121 qh = mpn_preinv_mu_div_qr (tp + dn + 1, rp + dn + 1, np + dn, qn, dp, dn,
122 this_ip, this_in, scratch);
124 MPN_COPY (rp + 1, np, dn);
126 this_in = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
127 this_ip = ip + in - this_in;
128 cy = mpn_preinv_mu_divappr_q (tp, rp, 2 * dn + 1, dp, dn,
129 this_ip, this_in, scratch);
131 if (UNLIKELY (cy != 0))
133 /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
134 canonically reduced, replace the returned value of B^(qn-dn)+eps
135 by the largest possible value. */
137 for (i = 0; i < dn + 1; i++)
138 tp[i] = GMP_NUMB_MAX;
141 /* The max error of mpn_mu_divappr_q is +4. If the low quotient limb is
142 greater than the max error, we cannot trust the quotient. */
145 MPN_COPY (qp, tp + 1, qn);
152 /* FIXME: can we use already allocated space? */
153 pp = TMP_BALLOC_LIMBS (nn);
154 mpn_mul (pp, tp + 1, qn, dp, dn);
156 cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;
158 if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
159 qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
160 else /* Same as above */
161 MPN_COPY (qp, tp + 1, qn);
166 /* |_______________________| dividend
167 |________________| divisor */
169 /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
170 here becomes 2dn, i.e., more than nn. This shouldn't hurt, since only
171 the most significant dn-1 limbs will actually be read, but it is not
174 qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
175 dp + dn - (qn + 1), qn + 1, scratch);
177 /* The max error of mpn_mu_divappr_q is +4, but we get an additional
178 error from the divisor truncation. */
181 MPN_COPY (qp, tp + 1, qn);
187 /* FIXME: a shorter product should be enough; we may use already
188 allocated space... */
189 rp = TMP_BALLOC_LIMBS (nn);
190 mpn_mul (rp, dp, dn, tp + 1, qn);
192 cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;
194 if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
195 qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
196 else /* Same as above */
197 MPN_COPY (qp, tp + 1, qn);
206 mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
208 mp_size_t qn, itch1, itch2;
213 itch1 = mpn_mu_div_qr_itch (qn, dn, mua_k);
214 itch2 = mpn_mu_divappr_q_itch (2 * dn + 1, dn, mua_k);
215 return MAX (itch1, itch2);
219 itch1 = mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);