contrib/gmp/mpn/generic/toom_interpolate_7pts.c

   1 /* mpn_toom_interpolate_7pts -- Interpolate for toom44, 53, 62.
   2
   3    Contributed to the GNU project by Niels Möller.
   4
   5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
   6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
   8
   9 Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
  10
  11 This file is part of the GNU MP Library.
  12
  13 The GNU MP Library is free software; you can redistribute it and/or modify
  14 it under the terms of the GNU Lesser General Public License as published by
  15 the Free Software Foundation; either version 3 of the License, or (at your
  16 option) any later version.
  17
  18 The GNU MP Library is distributed in the hope that it will be useful, but
  19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  20 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  21 License for more details.
  22
  23 You should have received a copy of the GNU Lesser General Public License
  24 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  25
  26 #include "gmp.h"
  27 #include "gmp-impl.h"
  28
  29 /* Arithmetic right shift, requiring that the shifted out bits are zero. */
  30 static inline void
  31 divexact_2exp (mp_ptr rp, mp_srcptr sp, mp_size_t n, unsigned shift)
  32 {
  33   mp_limb_t sign;
  34   sign = LIMB_HIGHBIT_TO_MASK (sp[n-1] << GMP_NAIL_BITS) << (GMP_NUMB_BITS - shift);
  35   ASSERT_NOCARRY (mpn_rshift (rp, sp, n, shift));
  36   rp[n-1] |= sign & GMP_NUMB_MASK;
  37 }
  38
  39 /* For odd divisors, mpn_divexact_1 works fine with two's complement. */
  40 #ifndef mpn_divexact_by3
  41 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
  42 #endif
  43 #ifndef mpn_divexact_by9
  44 #define mpn_divexact_by9(dst,src,size) mpn_divexact_1(dst,src,size,9)
  45 #endif
  46 #ifndef mpn_divexact_by15
  47 #define mpn_divexact_by15(dst,src,size) mpn_divexact_1(dst,src,size,15)
  48 #endif
  49
  50 /* Interpolation for toom4, using the evaluation points infinity, 2,
  51    1, -1, 1/2, -1/2. More precisely, we want to compute
  52    f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 6, given the
  53    seven values
  54
  55      w0 = f(0),
  56      w1 = 64 f(-1/2),
  57      w2 = 64 f(1/2),
  58      w3 = f(-1),
  59      w4 = f(1)
  60      w5 = f(2)
  61      w6 = limit at infinity of f(x) / x^6,
  62
  63    The result is 6*n + w6n limbs. At entry, w0 is stored at {rp, 2n },
  64    w2 is stored at { rp + 2n, 2n+1 }, and w6 is stored at { rp + 6n,
  65    w6n }. The other values are 2n + 1 limbs each (with most
  66    significant limbs small). f(-1) and f(-1/2) may be negative, signs
  67    determined by the flag bits. All intermediate results are
  68    represented in two's complement. Inputs are destroyed.
  69
  70    Needs (2*n + 1) limbs of temporary storage.
  71 */
  72
  73 void
  74 mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom4_flags flags,
  75                            mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5,
  76                            mp_size_t w6n, mp_ptr tp)
  77 {
  78   mp_size_t m = 2*n + 1;
  79   mp_ptr w2 = rp + 2*n;
  80   mp_ptr w6 = rp + 6*n;
  81   mp_limb_t cy;
  82
  83   ASSERT (w6n > 0);
  84   ASSERT (w6n <= 2*n);
  85
  86   /* Using Marco Bodrato's formulas
  87
  88      W5 = W5 + W2
  89      W3 =(W3 + W4)/2
  90      W1 = W1 + W2
  91      W2 = W2 - W6 - W0*64
  92      W2 =(W2*2 - W1)/8
  93      W4 = W4 - W3
  94
  95      W5 = W5 - W4*65
  96      W4 = W4 - W6 - W0
  97      W5 = W5 + W4*45
  98      W2 =(W2 - W4)/3
  99      W4 = W4 - W2
 100
 101      W1 = W1 - W5
 102      W5 =(W5 - W3*16)/ 18
 103      W3 = W3 - W5
 104      W1 =(W1/30 + W5)/ 2
 105      W5 = W5 - W1
 106
 107      where W0 = f(0), W1 = 64 f(-1/2), W2 = 64 f(1/2), W3 = f(-1),
 108            W4 = f(1), W5 = f(2), W6 = f(oo),
 109   */
 110
 111   mpn_add_n (w5, w5, w2, m);
 112   if (flags & toom4_w3_neg)
 113     mpn_add_n (w3, w3, w4, m);
 114   else
 115     mpn_sub_n (w3, w4, w3, m);
 116   divexact_2exp (w3, w3, m, 1);
 117   if (flags & toom4_w1_neg)
 118     mpn_add_n (w1, w1, w2, m);
 119   else
 120     mpn_sub_n (w1, w2, w1, m);
 121   mpn_sub (w2, w2, m, w6, w6n);
 122   tp[2*n] = mpn_lshift (tp, rp, 2*n, 6);
 123   mpn_sub_n (w2, w2, tp, m);
 124   mpn_lshift (w2, w2, m, 1);
 125   mpn_sub_n (w2, w2, w1, m);
 126   divexact_2exp (w2, w2, m, 3);
 127   mpn_sub_n (w4, w4, w3, m);
 128
 129   mpn_submul_1 (w5, w4, m, 65);
 130   mpn_sub (w4, w4, m, w6, w6n);
 131   mpn_sub (w4, w4, m, rp, 2*n);
 132   mpn_addmul_1 (w5, w4, m, 45);
 133   mpn_sub_n (w2, w2, w4, m);
 134   /* Rely on divexact working with two's complement */
 135   mpn_divexact_by3 (w2, w2, m);
 136   mpn_sub_n (w4, w4, w2, m);
 137
 138   mpn_sub_n (w1, w1, w5, m);
 139   mpn_lshift (tp, w3, m, 4);
 140   mpn_sub_n (w5, w5, tp, m);
 141   divexact_2exp (w5, w5, m, 1);
 142   mpn_divexact_by9 (w5, w5, m);
 143   mpn_sub_n (w3, w3, w5, m);
 144   divexact_2exp (w1, w1, m, 1);
 145   mpn_divexact_by15 (w1, w1, m);
 146   mpn_add_n (w1, w1, w5, m);
 147   divexact_2exp (w1, w1, m, 1);
 148   mpn_sub_n (w5, w5, w1, m);
 149
 150   /* Two's complement coefficients must be non-negative at the end of
 151      this procedure. */
 152   ASSERT ( !(w1[2*n] & GMP_LIMB_HIGHBIT));
 153   ASSERT ( !(w2[2*n] & GMP_LIMB_HIGHBIT));
 154   ASSERT ( !(w3[2*n] & GMP_LIMB_HIGHBIT));
 155   ASSERT ( !(w4[2*n] & GMP_LIMB_HIGHBIT));
 156   ASSERT ( !(w5[2*n] & GMP_LIMB_HIGHBIT));
 157
 158   /* Addition chain. Note carries and the 2n'th limbs that need to be
 159    * added in.
 160    *
 161    * Special care is needed for w2[2n] and the corresponding carry,
 162    * since the "simple" way of adding it all together would overwrite
 163    * the limb at wp[2*n] and rp[4*n] (same location) with the sum of
 164    * the high half of w3 and the low half of w4.
 165    *
 166    *         7    6    5    4    3    2    1    0
 167    *    |    |    |    |    |    |    |    |    |
 168    *                  ||w3 (2n+1)|
 169    *             ||w4 (2n+1)|
 170    *        ||w5 (2n+1)|        ||w1 (2n+1)|
 171    *  + | w6 (w6n)|        ||w2 (2n+1)| w0 (2n) |  (share storage with r)
 172    *  -----------------------------------------------
 173    *  r |    |    |    |    |    |    |    |    |
 174    *        c7   c6   c5   c4   c3                 Carries to propagate
 175    */
 176
 177   cy = mpn_add_n (rp + n, rp + n, w1, 2*n);
 178   MPN_INCR_U (w2 + n, n + 1, w1[2*n] + cy);
 179   cy = mpn_add_n (rp + 3*n, rp + 3*n, w3, n);
 180   MPN_INCR_U (w3 + n, n + 1, w2[2*n] + cy);
 181   cy = mpn_add_n (rp + 4*n, w3 + n, w4, n);
 182   MPN_INCR_U (w4 + n, n + 1, w3[2*n] + cy);
 183   cy = mpn_add_n (rp + 5*n, w4 + n, w5, n);
 184   MPN_INCR_U (w5 + n, n + 1, w4[2*n] + cy);
 185   if (w6n > n + 1)
 186     {
 187       mp_limb_t c7 = mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, n + 1);
 188       MPN_INCR_U (rp + 7*n + 1, w6n - n - 1, c7);
 189     }
 190   else
 191     {
 192       ASSERT_NOCARRY (mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, w6n));
 193 #if WANT_ASSERT
 194       {
 195         mp_size_t i;
 196         for (i = w6n; i <= n; i++)
 197           ASSERT (w5[n + i] == 0);
 198       }
 199 #endif
 200     }
 201 }