contrib/gmp/mpn/generic/toom_interpolate_5pts.c

   1 /* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.
   2
   3    Contributed to the GNU project by Robert Harley.
   4    Improvements by Paul Zimmermann and Marco Bodrato.
   5
   6    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
   7    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   8    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
   9
  10 Copyright 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2009 Free Software
  11 Foundation, Inc.
  12
  13 This file is part of the GNU MP Library.
  14
  15 The GNU MP Library is free software; you can redistribute it and/or modify it
  16 under the terms of the GNU Lesser General Public License as published by the
  17 Free Software Foundation; either version 3 of the License, or (at your
  18 option) any later version.
  19
  20 The GNU MP Library is distributed in the hope that it will be useful, but
  21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  22 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
  23 for more details.
  24
  25 You should have received a copy of the GNU Lesser General Public License
  26 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  27
  28 #include "gmp.h"
  29 #include "gmp-impl.h"
  30
  31 void
  32 mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
  33                            mp_size_t k, mp_size_t twor, int sa,
  34                            mp_limb_t vinf0)
  35 {
  36   mp_limb_t cy, saved;
  37   mp_size_t twok;
  38   mp_size_t kk1;
  39   mp_ptr c1, v1, c3, vinf;
  40
  41   twok = k + k;
  42   kk1 = twok + 1;
  43
  44   c1 = c  + k;
  45   v1 = c1 + k;
  46   c3 = v1 + k;
  47   vinf = c3 + k;
  48
  49 #define v0 (c)
  50   /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =
  51      thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)
  52   */
  53   if (sa)
  54     ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
  55   else
  56     ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));
  57
  58   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
  59        v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */
  60
  61   ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */
  62                                                       /* (5 3 1 1 0)*/
  63
  64   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
  65        v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */
  66
  67   /* (2) vm1 <- tm1 := (v1 - vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
  68      tm1 >= 0                                         (0  1 0  1 0)
  69      No carry comes out from {v1, kk1} +/- {vm1, kk1},
  70      and the division by two is exact.
  71      If (sa!=0) the sign of vm1 is negative */
  72   if (sa)
  73     {
  74 #ifdef HAVE_NATIVE_mpn_rsh1add_n
  75       mpn_rsh1add_n (vm1, v1, vm1, kk1);
  76 #else
  77       ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
  78       ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
  79 #endif
  80     }
  81   else
  82     {
  83 #ifdef HAVE_NATIVE_mpn_rsh1sub_n
  84       mpn_rsh1sub_n (vm1, v1, vm1, kk1);
  85 #else
  86       ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
  87       ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
  88 #endif
  89     }
  90
  91   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
  92        v0       v1        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
  93
  94   /* (3) v1 <- t1 := v1 - v0    (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
  95      t1 >= 0
  96   */
  97   vinf[0] -= mpn_sub_n (v1, v1, c, twok);
  98
  99   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
 100        v0     v1-v0        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
 101
 102   /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
 103      t2 >= 0                  [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
 104   */
 105 #ifdef HAVE_NATIVE_mpn_rsh1sub_n
 106   mpn_rsh1sub_n (v2, v2, v1, kk1);
 107 #else
 108   ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
 109   ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
 110 #endif
 111
 112   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
 113        v0     v1-v0        hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */
 114
 115   /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
 116      result is v1 >= 0
 117   */
 118   ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));
 119
 120   /* We do not need to read the value in vm1, so we add it in {c+k, ...} */
 121   cy = mpn_add_n (c1, c1, vm1, kk1);
 122   MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
 123   /* Memory allocated for vm1 is now free, it can be recycled ...*/
 124
 125   /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
 126      result is v2 >= 0 */
 127   saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
 128   vinf[0] = vinf0;       /* Set the right value for vinf0                     */
 129 #ifdef HAVE_NATIVE_mpn_sublsh1_n
 130   cy = mpn_sublsh1_n (v2, v2, vinf, twor);
 131 #else
 132   /* Overwrite unused vm1 */
 133   cy = mpn_lshift (vm1, vinf, twor, 1);
 134   cy += mpn_sub_n (v2, v2, vm1, twor);
 135 #endif
 136   MPN_DECR_U (v2 + twor, kk1 - twor, cy);
 137
 138   /* Current matrix is
 139      [1 0 0 0 0; vinf
 140       0 1 0 0 0; v2
 141       1 0 1 0 0; v1
 142       0 1 0 1 0; vm1
 143       0 0 0 0 1] v0
 144      Some vaues already are in-place (we added vm1 in the correct position)
 145      | vinf|  v1 |  v0 |
 146               | vm1 |
 147      One still is in a separated area
 148         | +v2 |
 149      We have to compute v1-=vinf; vm1 -= v2,
 150            |-vinf|
 151               | -v2 |
 152      Carefully reordering operations we can avoid to compute twice the sum
 153      of the high half of v2 plus the low half of vinf.
 154   */
 155
 156   /* Add the high half of t2 in {vinf} */
 157   if ( LIKELY(twor > k + 1) ) { /* This is the expected flow  */
 158     cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
 159     MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
 160   } else { /* triggered only by very unbalanced cases like
 161               (k+k+(k-2))x(k+k+1) , should be handled by toom32 */
 162     ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
 163   }
 164   /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
 165      result is >= 0 */
 166   /* Side effect: we also subtracted (high half) vm1 -= v2 */
 167   cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */
 168   vinf0 = vinf[0];                     /* Save again the right value for vinf0 */
 169   vinf[0] = saved;
 170   MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */
 171
 172   /* (8) vm1 <- vm1-v2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
 173      Operate only on the low half.
 174   */
 175   cy = mpn_sub_n (c1, c1, v2, k);
 176   MPN_DECR_U (v1, kk1, cy);
 177
 178   /********************* Beginning the final phase **********************/
 179
 180   /* Most of the recomposition was done */
 181
 182   /* add t2 in {c+3k, ...}, but only the low half */
 183   cy = mpn_add_n (c3, c3, v2, k);
 184   vinf[0] += cy;
 185   ASSERT(vinf[0] >= cy); /* No carry */
 186   MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */
 187
 188 #undef v0
 189 }