contrib/gmp/mpn/generic/hgcd2.c

   1 /* hgcd2.c
   2
   3    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
   4    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   5    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
   6
   7 Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008 Free Software
   8 Foundation, Inc.
   9
  10 This file is part of the GNU MP Library.
  11
  12 The GNU MP Library is free software; you can redistribute it and/or modify
  13 it under the terms of the GNU Lesser General Public License as published by
  14 the Free Software Foundation; either version 3 of the License, or (at your
  15 option) any later version.
  16
  17 The GNU MP Library is distributed in the hope that it will be useful, but
  18 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  19 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  20 License for more details.
  21
  22 You should have received a copy of the GNU Lesser General Public License
  23 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  24
  25 #include "gmp.h"
  26 #include "gmp-impl.h"
  27 #include "longlong.h"
  28
  29 #if GMP_NAIL_BITS == 0
  30
  31 /* Copied from the old mpn/generic/gcdext.c, and modified slightly to return
  32    the remainder. */
  33
  34 /* Single-limb division optimized for small quotients. */
  35 static inline mp_limb_t
  36 div1 (mp_ptr rp,
  37       mp_limb_t n0,
  38       mp_limb_t d0)
  39 {
  40   mp_limb_t q = 0;
  41
  42   if ((mp_limb_signed_t) n0 < 0)
  43     {
  44       int cnt;
  45       for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
  46         {
  47           d0 = d0 << 1;
  48         }
  49
  50       q = 0;
  51       while (cnt)
  52         {
  53           q <<= 1;
  54           if (n0 >= d0)
  55             {
  56               n0 = n0 - d0;
  57               q |= 1;
  58             }
  59           d0 = d0 >> 1;
  60           cnt--;
  61         }
  62     }
  63   else
  64     {
  65       int cnt;
  66       for (cnt = 0; n0 >= d0; cnt++)
  67         {
  68           d0 = d0 << 1;
  69         }
  70
  71       q = 0;
  72       while (cnt)
  73         {
  74           d0 = d0 >> 1;
  75           q <<= 1;
  76           if (n0 >= d0)
  77             {
  78               n0 = n0 - d0;
  79               q |= 1;
  80             }
  81           cnt--;
  82         }
  83     }
  84   *rp = n0;
  85   return q;
  86 }
  87
  88 /* Two-limb division optimized for small quotients.  */
  89 static inline mp_limb_t
  90 div2 (mp_ptr rp,
  91       mp_limb_t nh, mp_limb_t nl,
  92       mp_limb_t dh, mp_limb_t dl)
  93 {
  94   mp_limb_t q = 0;
  95
  96   if ((mp_limb_signed_t) nh < 0)
  97     {
  98       int cnt;
  99       for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)
 100         {
 101           dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
 102           dl = dl << 1;
 103         }
 104
 105       while (cnt)
 106         {
 107           q <<= 1;
 108           if (nh > dh || (nh == dh && nl >= dl))
 109             {
 110               sub_ddmmss (nh, nl, nh, nl, dh, dl);
 111               q |= 1;
 112             }
 113           dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
 114           dh = dh >> 1;
 115           cnt--;
 116         }
 117     }
 118   else
 119     {
 120       int cnt;
 121       for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
 122         {
 123           dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
 124           dl = dl << 1;
 125         }
 126
 127       while (cnt)
 128         {
 129           dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
 130           dh = dh >> 1;
 131           q <<= 1;
 132           if (nh > dh || (nh == dh && nl >= dl))
 133             {
 134               sub_ddmmss (nh, nl, nh, nl, dh, dl);
 135               q |= 1;
 136             }
 137           cnt--;
 138         }
 139     }
 140
 141   rp[0] = nl;
 142   rp[1] = nh;
 143
 144   return q;
 145 }
 146
 147 #if 0
 148 /* This div2 uses less branches, but it seems to nevertheless be
 149    slightly slower than the above code. */
 150 static inline mp_limb_t
 151 div2 (mp_ptr rp,
 152       mp_limb_t nh, mp_limb_t nl,
 153       mp_limb_t dh, mp_limb_t dl)
 154 {
 155   mp_limb_t q = 0;
 156   int ncnt;
 157   int dcnt;
 158
 159   count_leading_zeros (ncnt, nh);
 160   count_leading_zeros (dcnt, dh);
 161   dcnt -= ncnt;
 162
 163   dh = (dh << dcnt) + (-(dcnt > 0) & (dl >> (GMP_LIMB_BITS - dcnt)));
 164   dl <<= dcnt;
 165
 166   do
 167     {
 168       mp_limb_t bit;
 169       q <<= 1;
 170       if (UNLIKELY (nh == dh))
 171         bit = (nl >= dl);
 172       else
 173         bit = (nh > dh);
 174
 175       q |= bit;
 176
 177       sub_ddmmss (nh, nl, nh, nl, (-bit) & dh, (-bit) & dl);
 178
 179       dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
 180       dh = dh >> 1;
 181     }
 182   while (dcnt--);
 183
 184   rp[0] = nl;
 185   rp[1] = nh;
 186
 187   return q;
 188 }
 189 #endif
 190
 191 #else /* GMP_NAIL_BITS != 0 */
 192 /* Check all functions for nail support. */
 193 /* hgcd2 should be defined to take inputs including nail bits, and
 194    produce a matrix with elements also including nail bits. This is
 195    necessary, for the matrix elements to be useful with mpn_mul_1,
 196    mpn_addmul_1 and friends. */
 197 #error Not implemented
 198 #endif /* GMP_NAIL_BITS != 0 */
 199
 200 /* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
 201    matrix M. Returns 1 if we make progress, i.e. can perform at least
 202    one subtraction. Otherwise returns zero.. */
 203
 204 /* FIXME: Possible optimizations:
 205
 206    The div2 function starts with checking the most significant bit of
 207    the numerator. We can maintained normalized operands here, call
 208    hgcd with normalized operands only, which should make the code
 209    simpler and possibly faster.
 210
 211    Experiment with table lookups on the most significant bits.
 212
 213    This function is also a candidate for assembler implementation.
 214 */
 215 int
 216 mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
 217            struct hgcd_matrix1 *M)
 218 {
 219   mp_limb_t u00, u01, u10, u11;
 220
 221   if (ah < 2 || bh < 2)
 222     return 0;
 223
 224   if (ah > bh || (ah == bh && al > bl))
 225     {
 226       sub_ddmmss (ah, al, ah, al, bh, bl);
 227       if (ah < 2)
 228         return 0;
 229
 230       u00 = u01 = u11 = 1;
 231       u10 = 0;
 232     }
 233   else
 234     {
 235       sub_ddmmss (bh, bl, bh, bl, ah, al);
 236       if (bh < 2)
 237         return 0;
 238
 239       u00 = u10 = u11 = 1;
 240       u01 = 0;
 241     }
 242
 243   if (ah < bh)
 244     goto subtract_a;
 245
 246   for (;;)
 247     {
 248       ASSERT (ah >= bh);
 249       if (ah == bh)
 250         goto done;
 251
 252       if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
 253         {
 254           ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
 255           bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
 256
 257           break;
 258         }
 259
 260       /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
 261          1), affecting the second column of M. */
 262       ASSERT (ah > bh);
 263       sub_ddmmss (ah, al, ah, al, bh, bl);
 264
 265       if (ah < 2)
 266         goto done;
 267
 268       if (ah <= bh)
 269         {
 270           /* Use q = 1 */
 271           u01 += u00;
 272           u11 += u10;
 273         }
 274       else
 275         {
 276           mp_limb_t r[2];
 277           mp_limb_t q = div2 (r, ah, al, bh, bl);
 278           al = r[0]; ah = r[1];
 279           if (ah < 2)
 280             {
 281               /* A is too small, but q is correct. */
 282               u01 += q * u00;
 283               u11 += q * u10;
 284               goto done;
 285             }
 286           q++;
 287           u01 += q * u00;
 288           u11 += q * u10;
 289         }
 290     subtract_a:
 291       ASSERT (bh >= ah);
 292       if (ah == bh)
 293         goto done;
 294
 295       if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
 296         {
 297           ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
 298           bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
 299
 300           goto subtract_a1;
 301         }
 302
 303       /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
 304          1), affecting the first column of M. */
 305       sub_ddmmss (bh, bl, bh, bl, ah, al);
 306
 307       if (bh < 2)
 308         goto done;
 309
 310       if (bh <= ah)
 311         {
 312           /* Use q = 1 */
 313           u00 += u01;
 314           u10 += u11;
 315         }
 316       else
 317         {
 318           mp_limb_t r[2];
 319           mp_limb_t q = div2 (r, bh, bl, ah, al);
 320           bl = r[0]; bh = r[1];
 321           if (bh < 2)
 322             {
 323               /* B is too small, but q is correct. */
 324               u00 += q * u01;
 325               u10 += q * u11;
 326               goto done;
 327             }
 328           q++;
 329           u00 += q * u01;
 330           u10 += q * u11;
 331         }
 332     }
 333
 334   /* NOTE: Since we discard the least significant half limb, we don't
 335      get a truly maximal M (corresponding to |a - b| <
 336      2^{GMP_LIMB_BITS +1}). */
 337   /* Single precision loop */
 338   for (;;)
 339     {
 340       ASSERT (ah >= bh);
 341       if (ah == bh)
 342         break;
 343
 344       ah -= bh;
 345       if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
 346         break;
 347
 348       if (ah <= bh)
 349         {
 350           /* Use q = 1 */
 351           u01 += u00;
 352           u11 += u10;
 353         }
 354       else
 355         {
 356           mp_limb_t r;
 357           mp_limb_t q = div1 (&r, ah, bh);
 358           ah = r;
 359           if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
 360             {
 361               /* A is too small, but q is correct. */
 362               u01 += q * u00;
 363               u11 += q * u10;
 364               break;
 365             }
 366           q++;
 367           u01 += q * u00;
 368           u11 += q * u10;
 369         }
 370     subtract_a1:
 371       ASSERT (bh >= ah);
 372       if (ah == bh)
 373         break;
 374
 375       bh -= ah;
 376       if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
 377         break;
 378
 379       if (bh <= ah)
 380         {
 381           /* Use q = 1 */
 382           u00 += u01;
 383           u10 += u11;
 384         }
 385       else
 386         {
 387           mp_limb_t r;
 388           mp_limb_t q = div1 (&r, bh, ah);
 389           bh = r;
 390           if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
 391             {
 392               /* B is too small, but q is correct. */
 393               u00 += q * u01;
 394               u10 += q * u11;
 395               break;
 396             }
 397           q++;
 398           u00 += q * u01;
 399           u10 += q * u11;
 400         }
 401     }
 402
 403  done:
 404   M->u[0][0] = u00; M->u[0][1] = u01;
 405   M->u[1][0] = u10; M->u[1][1] = u11;
 406
 407   return 1;
 408 }
 409
 410 /* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
 411  * have space for n + 1 limbs. Uses three buffers to avoid a copy*/
 412 mp_size_t
 413 mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
 414                              mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
 415 {
 416   mp_limb_t ah, bh;
 417
 418   /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
 419
 420      r  = u00 * a
 421      r += u10 * b
 422      b *= u11
 423      b += u01 * a
 424   */
 425
 426 #if HAVE_NATIVE_mpn_addaddmul_1msb0
 427   ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
 428   bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
 429 #else
 430   ah =     mpn_mul_1 (rp, ap, n, M->u[0][0]);
 431   ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);
 432
 433   bh =     mpn_mul_1 (bp, bp, n, M->u[1][1]);
 434   bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
 435 #endif
 436   rp[n] = ah;
 437   bp[n] = bh;
 438
 439   n += (ah | bh) > 0;
 440   return n;
 441 }
 442
 443 /* Sets (r;b) = M^{-1}(a;b), with M^{-1} = (u11, -u01; -u10, u00) from
 444    the left. Uses three buffers, to avoid a copy. */
 445 mp_size_t
 446 mpn_hgcd_mul_matrix1_inverse_vector (const struct hgcd_matrix1 *M,
 447                                      mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
 448 {
 449   mp_limb_t h0, h1;
 450
 451   /* Compute (r;b) <-- (u11 a - u01 b; -u10 a + u00 b) as
 452
 453      r  = u11 * a
 454      r -= u01 * b
 455      b *= u00
 456      b -= u10 * a
 457   */
 458
 459   h0 =    mpn_mul_1 (rp, ap, n, M->u[1][1]);
 460   h1 = mpn_submul_1 (rp, bp, n, M->u[0][1]);
 461   ASSERT (h0 == h1);
 462
 463   h0 =    mpn_mul_1 (bp, bp, n, M->u[0][0]);
 464   h1 = mpn_submul_1 (bp, ap, n, M->u[1][0]);
 465   ASSERT (h0 == h1);
 466
 467   n -= (rp[n-1] | bp[n-1]) == 0;
 468   return n;
 469 }