8 #include <sys/resource.h>
16 return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
21 #ifndef CLOCKS_PER_SEC
22 #define CLOCKS_PER_SEC 1000000
25 #if CLOCKS_PER_SEC >= 10000
26 #define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
28 #define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
34 return CLOCK_TO_MILLISEC (clock ());
41 #if defined (__m88k__)
43 #elif defined (__i386__)
44 #define CLOCK (16.666667 M)
45 #elif defined (__m68k__)
47 #elif defined (_IBMR2)
49 #elif defined (__sparc__)
51 #elif defined (__sun__)
53 #elif defined (__mips)
55 #elif defined (__hppa__)
57 #elif defined (__alpha)
60 #error "Don't know CLOCK of your machine"
71 #define TIMES OPS/SIZE
74 #define OPS (SIZE*TIMES)
78 refmpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
79 register mp_ptr res_ptr;
80 register mp_srcptr s1_ptr;
82 register mp_limb_t s2_limb;
84 register mp_limb_t cy_limb;
86 register mp_limb_t prod_high, prod_low;
89 /* The loop counter and index J goes from -SIZE to -1. This way
90 the loop becomes faster. */
93 /* Offset the base pointers to compensate for the negative indices. */
100 umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
103 cy_limb = (prod_low < cy_limb) + prod_high;
106 prod_low = x + prod_low;
107 cy_limb += (prod_low < x);
108 res_ptr[j] = prod_low;
120 mp_limb_t dx[SIZE+2];
121 mp_limb_t dy[SIZE+2];
130 for (test = 0; ; test++)
133 size = (random () % SIZE + 1);
138 mpn_random2 (s1, size);
139 mpn_random2 (dy+1, size);
141 if (random () % 0x100 == 0)
144 mpn_random2 (&xlimb, 1);
146 dy[size+1] = 0x12345678;
149 #if defined (PRINT) || defined (XPRINT)
150 printf ("xlimb=%*lX\n", (int) (2 * sizeof(mp_limb_t)), xlimb);
153 mpn_print (dy+1, size);
154 mpn_print (s1, size);
157 MPN_COPY (dx, dy, size+2);
159 for (i = 0; i < TIMES; i++)
160 cyx = refmpn_addmul_1 (dx+1, s1, size, xlimb);
163 cyc = ((double) t * CLOCK) / (OPS * 1000.0);
164 printf ("refmpn_addmul_1: %5ldms (%.2f cycles/limb) [%.2f Gb/s]\n",
167 CLOCK/cyc*BITS_PER_MP_LIMB*BITS_PER_MP_LIMB);
170 MPN_COPY (dx, dy, size+2);
172 for (i = 0; i < TIMES; i++)
173 cyy = mpn_addmul_1 (dx+1, s1, size, xlimb);
176 cyc = ((double) t * CLOCK) / (OPS * 1000.0);
177 printf ("mpn_addmul_1: %5ldms (%.2f cycles/limb) [%.2f Gb/s]\n",
180 CLOCK/cyc*BITS_PER_MP_LIMB*BITS_PER_MP_LIMB);
183 MPN_COPY (dx, dy, size+2);
184 cyx = refmpn_addmul_1 (dx+1, s1, size, xlimb);
185 cyy = mpn_addmul_1 (dy+1, s1, size, xlimb);
188 printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
189 mpn_print (dx+1, size);
190 printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
191 mpn_print (dy+1, size);
195 if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
196 || dx[size+1] != 0x12345678 || dx[0] != 0x87654321)
199 printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
200 mpn_print (dx+1, size);
201 printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
202 mpn_print (dy+1, size);
210 mpn_print (mp_ptr p, mp_size_t size)
214 for (i = size - 1; i >= 0; i--)
216 printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);