contrib/libgmp/mpn/generic/bdivmod.c

   1 /* mpn/bdivmod.c: mpn_bdivmod for computing U/V mod 2^d.
   2
   3 Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
   4
   5 This file is part of the GNU MP Library.
   6
   7 The GNU MP Library is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU Library General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or (at your
  10 option) any later version.
  11
  12 The GNU MP Library is distributed in the hope that it will be useful, but
  13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
  15 License for more details.
  16
  17 You should have received a copy of the GNU Library General Public License
  18 along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  20 MA 02111-1307, USA. */
  21
  22 /* q_high = mpn_bdivmod (qp, up, usize, vp, vsize, d).
  23
  24    Puts the low d/BITS_PER_MP_LIMB limbs of Q = U / V mod 2^d at qp, and
  25    returns the high d%BITS_PER_MP_LIMB bits of Q as the result.
  26
  27    Also, U - Q * V mod 2^(usize*BITS_PER_MP_LIMB) is placed at up.  Since the
  28    low d/BITS_PER_MP_LIMB limbs of this difference are zero, the code allows
  29    the limb vectors at qp to overwrite the low limbs at up, provided qp <= up.
  30
  31    Preconditions:
  32    1.  V is odd.
  33    2.  usize * BITS_PER_MP_LIMB >= d.
  34    3.  If Q and U overlap, qp <= up.
  35
  36    Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
  37
  38    Funding for this work has been partially provided by Conselho Nacional
  39    de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
  40    301314194-2, and was done while I was a visiting reseacher in the Instituto
  41    de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
  42
  43    References:
  44        T. Jebelean, An algorithm for exact division, Journal of Symbolic
  45        Computation, v. 15, 1993, pp. 169-180.
  46
  47        K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
  48        Mathematical Software, v. 21 (March), 1995, pp. 111-122.  */
  49
  50 #include "gmp.h"
  51 #include "gmp-impl.h"
  52 #include "longlong.h"
  53
  54 mp_limb_t
  55 #if __STDC__
  56 mpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize,
  57              mp_srcptr vp, mp_size_t vsize, unsigned long int d)
  58 #else
  59 mpn_bdivmod (qp, up, usize, vp, vsize, d)
  60      mp_ptr qp;
  61      mp_ptr up;
  62      mp_size_t usize;
  63      mp_srcptr vp;
  64      mp_size_t vsize;
  65      unsigned long int d;
  66 #endif
  67 {
  68   /* Cache for v_inv is used to make mpn_accelgcd faster.  */
  69   static mp_limb_t previous_low_vlimb = 0;
  70   static mp_limb_t v_inv;               /* 1/V mod 2^BITS_PER_MP_LIMB.  */
  71
  72   if (vp[0] != previous_low_vlimb)      /* Cache miss; compute v_inv.  */
  73     {
  74       mp_limb_t v = previous_low_vlimb = vp[0];
  75       mp_limb_t make_zero = 1;
  76       mp_limb_t two_i = 1;
  77       v_inv = 0;
  78       do
  79         {
  80           while ((two_i & make_zero) == 0)
  81             two_i <<= 1, v <<= 1;
  82           v_inv += two_i;
  83           make_zero -= v;
  84         }
  85       while (make_zero);
  86     }
  87
  88   /* Need faster computation for some common cases in mpn_accelgcd.  */
  89   if (usize == 2 && vsize == 2 &&
  90       (d == BITS_PER_MP_LIMB || d == 2*BITS_PER_MP_LIMB))
  91     {
  92       mp_limb_t hi, lo;
  93       mp_limb_t q = up[0] * v_inv;
  94       umul_ppmm (hi, lo, q, vp[0]);
  95       up[0] = 0, up[1] -= hi + q*vp[1], qp[0] = q;
  96       if (d == 2*BITS_PER_MP_LIMB)
  97         q = up[1] * v_inv, up[1] = 0, qp[1] = q;
  98       return 0;
  99     }
 100
 101   /* Main loop.  */
 102   while (d >= BITS_PER_MP_LIMB)
 103     {
 104       mp_limb_t q = up[0] * v_inv;
 105       mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
 106       if (usize > vsize)
 107         mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
 108       d -= BITS_PER_MP_LIMB;
 109       up += 1, usize -= 1;
 110       *qp++ = q;
 111     }
 112
 113   if (d)
 114     {
 115       mp_limb_t b;
 116       mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1<<d) - 1);
 117       switch (q)
 118         {
 119           case 0:  return 0;
 120           case 1:  b = mpn_sub_n (up, up, vp, MIN (usize, vsize));   break;
 121           default: b = mpn_submul_1 (up, vp, MIN (usize, vsize), q); break;
 122         }
 123       if (usize > vsize)
 124         mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
 125       return q;
 126     }
 127
 128   return 0;
 129 }