1 /* mpn_mod_1s_3p (ap, n, b, cps)
2 Divide (ap,,n) by b. Return the single-limb remainder.
3 Requires that d < B / 4.
5 Contributed to the GNU project by Torbjorn Granlund.
7 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
8 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
9 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
11 Copyright 2008, 2009 Free Software Foundation, Inc.
13 This file is part of the GNU MP Library.
15 The GNU MP Library is free software; you can redistribute it and/or modify
16 it under the terms of the GNU Lesser General Public License as published by
17 the Free Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
20 The GNU MP Library is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
23 License for more details.
25 You should have received a copy of the GNU Lesser General Public License
26 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
33 mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
36 mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
39 ASSERT (b <= GMP_NUMB_MAX / 4);
41 count_leading_zeros (cnt, b);
46 B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
47 ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
48 udiv_rnd_preinv (B2modb, B1modb, b, bi);
49 udiv_rnd_preinv (B3modb, B2modb, b, bi);
50 udiv_rnd_preinv (B4modb, B3modb, b, bi);
51 udiv_rnd_preinv (B5modb, B4modb, b, bi);
55 cps[2] = B1modb >> cnt;
56 cps[3] = B2modb >> cnt;
57 cps[4] = B3modb >> cnt;
58 cps[5] = B4modb >> cnt;
59 cps[6] = B5modb >> cnt;
63 mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[7])
65 mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
66 mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
76 umul_ppmm (ph, pl, ap[n - 3], B1modb);
77 add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 4]);
79 umul_ppmm (ch, cl, ap[n - 2], B2modb);
80 add_ssaaaa (ph, pl, ph, pl, ch, cl);
82 umul_ppmm (ch, cl, ap[n - 1], B3modb);
83 add_ssaaaa (rh, rl, ph, pl, ch, cl);
85 for (i = n - 8; i >= 0; i -= 4)
88 + ap[i+1] * (B mod b) <= (B-1)(b-1)
89 + ap[i+2] * (B^2 mod b) <= (B-1)(b-1)
90 + ap[i+3] * (B^3 mod b) <= (B-1)(b-1)
91 + LO(rr) * (B^4 mod b) <= (B-1)(b-1)
92 + HI(rr) * (B^5 mod b) <= (B-1)(b-1)
94 umul_ppmm (ph, pl, ap[i + 1], B1modb);
95 add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
97 umul_ppmm (ch, cl, ap[i + 2], B2modb);
98 add_ssaaaa (ph, pl, ph, pl, ch, cl);
100 umul_ppmm (ch, cl, ap[i + 3], B3modb);
101 add_ssaaaa (ph, pl, ph, pl, ch, cl);
103 umul_ppmm (ch, cl, rl, B4modb);
104 add_ssaaaa (ph, pl, ph, pl, ch, cl);
106 umul_ppmm (rh, rl, rh, B5modb);
107 add_ssaaaa (rh, rl, rh, rl, ph, pl);
112 umul_ppmm (ph, pl, rl, B1modb);
113 add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 3]);
114 umul_ppmm (rh, rl, rh, B2modb);
115 add_ssaaaa (rh, rl, rh, rl, ph, pl);
118 umul_ppmm (ph, pl, rl, B1modb);
119 add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 2]);
120 umul_ppmm (rh, rl, rh, B2modb);
121 add_ssaaaa (rh, rl, rh, rl, ph, pl);
124 umul_ppmm (ph, pl, rl, B1modb);
125 add_ssaaaa (ph, pl, ph, pl, 0, ap[0]);
126 umul_ppmm (rh, rl, rh, B2modb);
127 add_ssaaaa (rh, rl, rh, rl, ph, pl);
136 umul_ppmm (rh, cl, rh, B1modb);
137 add_ssaaaa (rh, rl, rh, rl, 0, cl);
138 r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
140 udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
141 (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
142 ASSERT (q <= 4); /* optimize for small quotient? */
145 udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);