1 /* mpn/bdivmod.c: mpn_bdivmod for computing U/V mod 2^d.
3 Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
5 This file is part of the GNU MP Library.
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Library General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or (at your
10 option) any later version.
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15 License for more details.
17 You should have received a copy of the GNU Library General Public License
18 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20 MA 02111-1307, USA. */
22 /* q_high = mpn_bdivmod (qp, up, usize, vp, vsize, d).
24 Puts the low d/BITS_PER_MP_LIMB limbs of Q = U / V mod 2^d at qp, and
25 returns the high d%BITS_PER_MP_LIMB bits of Q as the result.
27 Also, U - Q * V mod 2^(usize*BITS_PER_MP_LIMB) is placed at up. Since the
28 low d/BITS_PER_MP_LIMB limbs of this difference are zero, the code allows
29 the limb vectors at qp to overwrite the low limbs at up, provided qp <= up.
33 2. usize * BITS_PER_MP_LIMB >= d.
34 3. If Q and U overlap, qp <= up.
36 Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
38 Funding for this work has been partially provided by Conselho Nacional
39 de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
40 301314194-2, and was done while I was a visiting reseacher in the Instituto
41 de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
44 T. Jebelean, An algorithm for exact division, Journal of Symbolic
45 Computation, v. 15, 1993, pp. 169-180.
47 K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
48 Mathematical Software, v. 21 (March), 1995, pp. 111-122. */
56 mpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize,
57 mp_srcptr vp, mp_size_t vsize, unsigned long int d)
59 mpn_bdivmod (qp, up, usize, vp, vsize, d)
68 /* Cache for v_inv is used to make mpn_accelgcd faster. */
69 static mp_limb_t previous_low_vlimb = 0;
70 static mp_limb_t v_inv; /* 1/V mod 2^BITS_PER_MP_LIMB. */
72 if (vp[0] != previous_low_vlimb) /* Cache miss; compute v_inv. */
74 mp_limb_t v = previous_low_vlimb = vp[0];
75 mp_limb_t make_zero = 1;
80 while ((two_i & make_zero) == 0)
88 /* Need faster computation for some common cases in mpn_accelgcd. */
89 if (usize == 2 && vsize == 2 &&
90 (d == BITS_PER_MP_LIMB || d == 2*BITS_PER_MP_LIMB))
93 mp_limb_t q = up[0] * v_inv;
94 umul_ppmm (hi, lo, q, vp[0]);
95 up[0] = 0, up[1] -= hi + q*vp[1], qp[0] = q;
96 if (d == 2*BITS_PER_MP_LIMB)
97 q = up[1] * v_inv, up[1] = 0, qp[1] = q;
102 while (d >= BITS_PER_MP_LIMB)
104 mp_limb_t q = up[0] * v_inv;
105 mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
107 mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
108 d -= BITS_PER_MP_LIMB;
116 mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1<<d) - 1);
120 case 1: b = mpn_sub_n (up, up, vp, MIN (usize, vsize)); break;
121 default: b = mpn_submul_1 (up, vp, MIN (usize, vsize), q); break;
124 mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);