1 /* mpn_div_q -- division for arbitrary size operands.
3 Contributed to the GNU project by Torbjorn Granlund.
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9 Copyright 2009, 2010 Free Software Foundation, Inc.
11 This file is part of the GNU MP Library.
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 License for more details.
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
31 /* Compute Q = N/D with truncation.
35 T = {scratch,nn+1} is scratch space
36 N and D are both untouched by the computation.
37 N and T may overlap; pass the same space if N is irrelevant after the call,
38 but note that tp needs an extra limb.
43 No overlap between the N, D, and Q areas.
45 This division function does not clobber its input operands, since it is
46 intended to support average-O(qn) division, and for that to be effective, it
47 cannot put requirements on callers to copy a O(nn) operand.
49 If a caller does not care about the value of {np,nn+1} after calling this
50 function, it should pass np also for the scratch argument. This function
51 will then save some time and space by avoiding allocation and copying.
52 (FIXME: Is this a good design? We only really save any copying for
53 already-normalised divisors, which should be rare. It also prevents us from
54 reasonably asking for all scratch space we need.)
56 We write nn-dn+1 limbs for the quotient, but return void. Why not return
57 the most significant quotient limb? Look at the 4 main code blocks below
58 (consisting of an outer if-else where each arm contains an if-else). It is
59 tricky for the first code block, since the mpn_*_div_q calls will typically
60 generate all nn-dn+1 and return 0 or 1. I don't see how to fix that unless
61 we generate the most significant quotient limb here, before calling
62 mpn_*_div_q, or put the quotient in a temporary area. Since this is a
63 critical division case (the SB sub-case in particular) copying is not a good
66 It might make sense to split the if-else parts of the (qn + FUDGE
67 >= dn) blocks into separate functions, since we could promise quite
68 different things to callers in these two cases. The 'then' case
69 benefits from np=scratch, and it could perhaps even tolerate qp=np,
70 saving some headache for many callers.
72 FIXME: Scratch allocation leaves a lot to be desired. E.g., for the MU size
73 operands, we do not reuse the huge scratch for adjustments. This can be a
74 serious waste of memory for the largest operands.
77 /* FUDGE determines when to try getting an approximate quotient from the upper
78 parts of the dividend and divisor, then adjust. N.B. FUDGE must be >= 2
79 for the code to be correct. */
80 #define FUDGE 5 /* FIXME: tune this */
82 #define DC_DIV_Q_THRESHOLD DC_DIVAPPR_Q_THRESHOLD
83 #define MU_DIV_Q_THRESHOLD MU_DIVAPPR_Q_THRESHOLD
84 #define MUPI_DIV_Q_THRESHOLD MUPI_DIVAPPR_Q_THRESHOLD
85 #ifndef MUPI_DIVAPPR_Q_THRESHOLD
86 #define MUPI_DIVAPPR_Q_THRESHOLD MUPI_DIV_QR_THRESHOLD
91 mp_srcptr np, mp_size_t nn,
92 mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
94 mp_ptr new_dp, new_np, tp, rp;
104 ASSERT (dp[dn - 1] != 0);
105 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
106 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
107 ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
109 ASSERT_ALWAYS (FUDGE >= 2);
113 mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);
117 qn = nn - dn + 1; /* Quotient size, high limb might be zero */
119 if (qn + FUDGE >= dn)
121 /* |________________________|
126 if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
128 count_leading_zeros (cnt, dh);
130 cy = mpn_lshift (new_np, np, nn, cnt);
132 new_nn = nn + (cy != 0);
134 new_dp = TMP_ALLOC_LIMBS (dn);
135 mpn_lshift (new_dp, dp, dn, cnt);
139 qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
141 else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
142 BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
144 invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
145 qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
147 else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */
148 BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
149 (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
150 + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */
152 invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
153 qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
157 mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
158 mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
159 qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
163 else if (UNLIKELY (qh != 0))
165 /* This happens only when the quotient is close to B^n and
166 mpn_*_divappr_q returned B^n. */
169 for (i = 0; i < n; i++)
170 qp[i] = GMP_NUMB_MAX;
171 qh = 0; /* currently ignored */
174 else /* divisor is already normalised */
177 MPN_COPY (new_np, np, nn);
181 qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
183 else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
184 BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
186 invert_pi1 (dinv, dh, dp[dn - 2]);
187 qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
189 else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */
190 BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
191 (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
192 + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */
194 invert_pi1 (dinv, dh, dp[dn - 2]);
195 qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
199 mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
200 mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
201 qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
208 /* |________________________|
209 |_________________| */
210 tp = TMP_ALLOC_LIMBS (qn + 1);
215 /* We need {np,nn} to remain untouched until the final adjustment, so
216 we need to allocate separate space for new_np. */
217 new_np = TMP_ALLOC_LIMBS (new_nn + 1);
221 if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
223 count_leading_zeros (cnt, dh);
225 cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
230 new_dp = TMP_ALLOC_LIMBS (qn + 1);
231 mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
232 new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
236 qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
238 else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
240 invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
241 qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
243 else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
245 invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
246 qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
250 mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
251 mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
252 qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
256 else if (UNLIKELY (qh != 0))
258 /* This happens only when the quotient is close to B^n and
259 mpn_*_divappr_q returned B^n. */
261 n = new_nn - (qn + 1);
262 for (i = 0; i < n; i++)
263 tp[i] = GMP_NUMB_MAX;
264 qh = 0; /* currently ignored */
267 else /* divisor is already normalised */
269 MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless of MU will be used */
271 new_dp = (mp_ptr) dp + dn - (qn + 1);
275 qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
277 else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
279 invert_pi1 (dinv, dh, new_dp[qn - 1]);
280 qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
282 else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
284 invert_pi1 (dinv, dh, new_dp[qn - 1]);
285 qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
289 mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
290 mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
291 qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
296 MPN_COPY (qp, tp + 1, qn);
301 rp = TMP_ALLOC_LIMBS (dn + qn);
302 mpn_mul (rp, dp, dn, tp + 1, qn);
304 rn -= rp[rn - 1] == 0;
306 if (rn > nn || mpn_cmp (np, rp, nn) < 0)