Update LibreSSL from version 2.4.4 => 2.9.1
[dragonfly.git] / crypto / libressl / crypto / bn / bn_mul.c
1 /* $OpenBSD: bn_mul.c,v 1.20 2015/02/09 15:49:22 jsing Exp $ */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3  * All rights reserved.
4  *
5  * This package is an SSL implementation written
6  * by Eric Young (eay@cryptsoft.com).
7  * The implementation was written so as to conform with Netscapes SSL.
8  *
9  * This library is free for commercial and non-commercial use as long as
10  * the following conditions are aheared to.  The following conditions
11  * apply to all code found in this distribution, be it the RC4, RSA,
12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13  * included with this distribution is covered by the same copyright terms
14  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15  *
16  * Copyright remains Eric Young's, and as such any Copyright notices in
17  * the code are not to be removed.
18  * If this package is used in a product, Eric Young should be given attribution
19  * as the author of the parts of the library used.
20  * This can be in the form of a textual message at program startup or
21  * in documentation (online or textual) provided with the package.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the copyright
27  *    notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  *    notice, this list of conditions and the following disclaimer in the
30  *    documentation and/or other materials provided with the distribution.
31  * 3. All advertising materials mentioning features or use of this software
32  *    must display the following acknowledgement:
33  *    "This product includes cryptographic software written by
34  *     Eric Young (eay@cryptsoft.com)"
35  *    The word 'cryptographic' can be left out if the rouines from the library
36  *    being used are not cryptographic related :-).
37  * 4. If you include any Windows specific code (or a derivative thereof) from
38  *    the apps directory (application code) you must include an acknowledgement:
39  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40  *
41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  *
53  * The licence and distribution terms for any publically available version or
54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
55  * copied and put under another distribution licence
56  * [including the GNU Public Licence.]
57  */
58
59 #ifndef BN_DEBUG
60 # undef NDEBUG /* avoid conflicting definitions */
61 # define NDEBUG
62 #endif
63
64 #include <assert.h>
65 #include <stdio.h>
66 #include <string.h>
67
68 #include <openssl/opensslconf.h>
69
70 #include "bn_lcl.h"
71
72 #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
73 /* Here follows specialised variants of bn_add_words() and
74    bn_sub_words().  They have the property performing operations on
75    arrays of different sizes.  The sizes of those arrays is expressed through
76    cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
77    which is the delta between the two lengths, calculated as len(a)-len(b).
78    All lengths are the number of BN_ULONGs...  For the operations that require
79    a result array as parameter, it must have the length cl+abs(dl).
80    These functions should probably end up in bn_asm.c as soon as there are
81    assembler counterparts for the systems that use assembler files.  */
82
83 BN_ULONG
84 bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
85     int dl)
86 {
87         BN_ULONG c, t;
88
89         assert(cl >= 0);
90         c = bn_sub_words(r, a, b, cl);
91
92         if (dl == 0)
93                 return c;
94
95         r += cl;
96         a += cl;
97         b += cl;
98
99         if (dl < 0) {
100 #ifdef BN_COUNT
101                 fprintf(stderr,
102                     "  bn_sub_part_words %d + %d (dl < 0, c = %d)\n",
103                     cl, dl, c);
104 #endif
105                 for (;;) {
106                         t = b[0];
107                         r[0] = (0 - t - c) & BN_MASK2;
108                         if (t != 0)
109                                 c = 1;
110                         if (++dl >= 0)
111                                 break;
112
113                         t = b[1];
114                         r[1] = (0 - t - c) & BN_MASK2;
115                         if (t != 0)
116                                 c = 1;
117                         if (++dl >= 0)
118                                 break;
119
120                         t = b[2];
121                         r[2] = (0 - t - c) & BN_MASK2;
122                         if (t != 0)
123                                 c = 1;
124                         if (++dl >= 0)
125                                 break;
126
127                         t = b[3];
128                         r[3] = (0 - t - c) & BN_MASK2;
129                         if (t != 0)
130                                 c = 1;
131                         if (++dl >= 0)
132                                 break;
133
134                         b += 4;
135                         r += 4;
136                 }
137         } else {
138                 int save_dl = dl;
139 #ifdef BN_COUNT
140                 fprintf(stderr,
141                     "  bn_sub_part_words %d + %d (dl > 0, c = %d)\n",
142                     cl, dl, c);
143 #endif
144                 while (c) {
145                         t = a[0];
146                         r[0] = (t - c) & BN_MASK2;
147                         if (t != 0)
148                                 c = 0;
149                         if (--dl <= 0)
150                                 break;
151
152                         t = a[1];
153                         r[1] = (t - c) & BN_MASK2;
154                         if (t != 0)
155                                 c = 0;
156                         if (--dl <= 0)
157                                 break;
158
159                         t = a[2];
160                         r[2] = (t - c) & BN_MASK2;
161                         if (t != 0)
162                                 c = 0;
163                         if (--dl <= 0)
164                                 break;
165
166                         t = a[3];
167                         r[3] = (t - c) & BN_MASK2;
168                         if (t != 0)
169                                 c = 0;
170                         if (--dl <= 0)
171                                 break;
172
173                         save_dl = dl;
174                         a += 4;
175                         r += 4;
176                 }
177                 if (dl > 0) {
178 #ifdef BN_COUNT
179                         fprintf(stderr,
180                             "  bn_sub_part_words %d + %d (dl > 0, c == 0)\n",
181                             cl, dl);
182 #endif
183                         if (save_dl > dl) {
184                                 switch (save_dl - dl) {
185                                 case 1:
186                                         r[1] = a[1];
187                                         if (--dl <= 0)
188                                                 break;
189                                 case 2:
190                                         r[2] = a[2];
191                                         if (--dl <= 0)
192                                                 break;
193                                 case 3:
194                                         r[3] = a[3];
195                                         if (--dl <= 0)
196                                                 break;
197                                 }
198                                 a += 4;
199                                 r += 4;
200                         }
201                 }
202                 if (dl > 0) {
203 #ifdef BN_COUNT
204                         fprintf(stderr,
205                             "  bn_sub_part_words %d + %d (dl > 0, copy)\n",
206                             cl, dl);
207 #endif
208                         for (;;) {
209                                 r[0] = a[0];
210                                 if (--dl <= 0)
211                                         break;
212                                 r[1] = a[1];
213                                 if (--dl <= 0)
214                                         break;
215                                 r[2] = a[2];
216                                 if (--dl <= 0)
217                                         break;
218                                 r[3] = a[3];
219                                 if (--dl <= 0)
220                                         break;
221
222                                 a += 4;
223                                 r += 4;
224                         }
225                 }
226         }
227         return c;
228 }
229 #endif
230
231 BN_ULONG
232 bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
233     int dl)
234 {
235         BN_ULONG c, l, t;
236
237         assert(cl >= 0);
238         c = bn_add_words(r, a, b, cl);
239
240         if (dl == 0)
241                 return c;
242
243         r += cl;
244         a += cl;
245         b += cl;
246
247         if (dl < 0) {
248                 int save_dl = dl;
249 #ifdef BN_COUNT
250                 fprintf(stderr,
251                     "  bn_add_part_words %d + %d (dl < 0, c = %d)\n",
252                     cl, dl, c);
253 #endif
254                 while (c) {
255                         l = (c + b[0]) & BN_MASK2;
256                         c = (l < c);
257                         r[0] = l;
258                         if (++dl >= 0)
259                                 break;
260
261                         l = (c + b[1]) & BN_MASK2;
262                         c = (l < c);
263                         r[1] = l;
264                         if (++dl >= 0)
265                                 break;
266
267                         l = (c + b[2]) & BN_MASK2;
268                         c = (l < c);
269                         r[2] = l;
270                         if (++dl >= 0)
271                                 break;
272
273                         l = (c + b[3]) & BN_MASK2;
274                         c = (l < c);
275                         r[3] = l;
276                         if (++dl >= 0)
277                                 break;
278
279                         save_dl = dl;
280                         b += 4;
281                         r += 4;
282                 }
283                 if (dl < 0) {
284 #ifdef BN_COUNT
285                         fprintf(stderr,
286                             "  bn_add_part_words %d + %d (dl < 0, c == 0)\n",
287                             cl, dl);
288 #endif
289                         if (save_dl < dl) {
290                                 switch (dl - save_dl) {
291                                 case 1:
292                                         r[1] = b[1];
293                                         if (++dl >= 0)
294                                                 break;
295                                 case 2:
296                                         r[2] = b[2];
297                                         if (++dl >= 0)
298                                                 break;
299                                 case 3:
300                                         r[3] = b[3];
301                                         if (++dl >= 0)
302                                                 break;
303                                 }
304                                 b += 4;
305                                 r += 4;
306                         }
307                 }
308                 if (dl < 0) {
309 #ifdef BN_COUNT
310                         fprintf(stderr,
311                             "  bn_add_part_words %d + %d (dl < 0, copy)\n",
312                             cl, dl);
313 #endif
314                         for (;;) {
315                                 r[0] = b[0];
316                                 if (++dl >= 0)
317                                         break;
318                                 r[1] = b[1];
319                                 if (++dl >= 0)
320                                         break;
321                                 r[2] = b[2];
322                                 if (++dl >= 0)
323                                         break;
324                                 r[3] = b[3];
325                                 if (++dl >= 0)
326                                         break;
327
328                                 b += 4;
329                                 r += 4;
330                         }
331                 }
332         } else {
333                 int save_dl = dl;
334 #ifdef BN_COUNT
335                 fprintf(stderr,
336                     "  bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
337 #endif
338                 while (c) {
339                         t = (a[0] + c) & BN_MASK2;
340                         c = (t < c);
341                         r[0] = t;
342                         if (--dl <= 0)
343                                 break;
344
345                         t = (a[1] + c) & BN_MASK2;
346                         c = (t < c);
347                         r[1] = t;
348                         if (--dl <= 0)
349                                 break;
350
351                         t = (a[2] + c) & BN_MASK2;
352                         c = (t < c);
353                         r[2] = t;
354                         if (--dl <= 0)
355                                 break;
356
357                         t = (a[3] + c) & BN_MASK2;
358                         c = (t < c);
359                         r[3] = t;
360                         if (--dl <= 0)
361                                 break;
362
363                         save_dl = dl;
364                         a += 4;
365                         r += 4;
366                 }
367 #ifdef BN_COUNT
368                 fprintf(stderr,
369                     "  bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
370 #endif
371                 if (dl > 0) {
372                         if (save_dl > dl) {
373                                 switch (save_dl - dl) {
374                                 case 1:
375                                         r[1] = a[1];
376                                         if (--dl <= 0)
377                                                 break;
378                                 case 2:
379                                         r[2] = a[2];
380                                         if (--dl <= 0)
381                                                 break;
382                                 case 3:
383                                         r[3] = a[3];
384                                         if (--dl <= 0)
385                                                 break;
386                                 }
387                                 a += 4;
388                                 r += 4;
389                         }
390                 }
391                 if (dl > 0) {
392 #ifdef BN_COUNT
393                         fprintf(stderr,
394                             "  bn_add_part_words %d + %d (dl > 0, copy)\n",
395                             cl, dl);
396 #endif
397                         for (;;) {
398                                 r[0] = a[0];
399                                 if (--dl <= 0)
400                                         break;
401                                 r[1] = a[1];
402                                 if (--dl <= 0)
403                                         break;
404                                 r[2] = a[2];
405                                 if (--dl <= 0)
406                                         break;
407                                 r[3] = a[3];
408                                 if (--dl <= 0)
409                                         break;
410
411                                 a += 4;
412                                 r += 4;
413                         }
414                 }
415         }
416         return c;
417 }
418
419 #ifdef BN_RECURSION
420 /* Karatsuba recursive multiplication algorithm
421  * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
422
423 /* r is 2*n2 words in size,
424  * a and b are both n2 words in size.
425  * n2 must be a power of 2.
426  * We multiply and return the result.
427  * t must be 2*n2 words in size
428  * We calculate
429  * a[0]*b[0]
430  * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
431  * a[1]*b[1]
432  */
433 /* dnX may not be positive, but n2/2+dnX has to be */
434 void
435 bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna,
436     int dnb, BN_ULONG *t)
437 {
438         int n = n2 / 2, c1, c2;
439         int tna = n + dna, tnb = n + dnb;
440         unsigned int neg, zero;
441         BN_ULONG ln, lo, *p;
442
443 # ifdef BN_COUNT
444         fprintf(stderr, " bn_mul_recursive %d%+d * %d%+d\n",n2,dna,n2,dnb);
445 # endif
446 # ifdef BN_MUL_COMBA
447 #  if 0
448         if (n2 == 4) {
449                 bn_mul_comba4(r, a, b);
450                 return;
451         }
452 #  endif
453         /* Only call bn_mul_comba 8 if n2 == 8 and the
454          * two arrays are complete [steve]
455          */
456         if (n2 == 8 && dna == 0 && dnb == 0) {
457                 bn_mul_comba8(r, a, b);
458                 return;
459         }
460 # endif /* BN_MUL_COMBA */
461         /* Else do normal multiply */
462         if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
463                 bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
464                 if ((dna + dnb) < 0)
465                         memset(&r[2*n2 + dna + dnb], 0,
466                             sizeof(BN_ULONG) * -(dna + dnb));
467                 return;
468         }
469         /* r=(a[0]-a[1])*(b[1]-b[0]) */
470         c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
471         c2 = bn_cmp_part_words(&(b[n]), b,tnb, tnb - n);
472         zero = neg = 0;
473         switch (c1 * 3 + c2) {
474         case -4:
475                 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
476                 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
477                 break;
478         case -3:
479                 zero = 1;
480                 break;
481         case -2:
482                 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
483                 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
484                 neg = 1;
485                 break;
486         case -1:
487         case 0:
488         case 1:
489                 zero = 1;
490                 break;
491         case 2:
492                 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
493                 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
494                 neg = 1;
495                 break;
496         case 3:
497                 zero = 1;
498                 break;
499         case 4:
500                 bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
501                 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
502                 break;
503         }
504
505 # ifdef BN_MUL_COMBA
506         if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
507                                                extra args to do this well */
508         {
509                 if (!zero)
510                         bn_mul_comba4(&(t[n2]), t, &(t[n]));
511                 else
512                         memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG));
513
514                 bn_mul_comba4(r, a, b);
515                 bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
516         } else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
517                                                     take extra args to do this
518                                                     well */
519         {
520                 if (!zero)
521                         bn_mul_comba8(&(t[n2]), t, &(t[n]));
522                 else
523                         memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG));
524
525                 bn_mul_comba8(r, a, b);
526                 bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
527         } else
528 # endif /* BN_MUL_COMBA */
529         {
530                 p = &(t[n2 * 2]);
531                 if (!zero)
532                         bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
533                 else
534                         memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
535                 bn_mul_recursive(r, a, b, n, 0, 0, p);
536                 bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
537         }
538
539         /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
540          * r[10] holds (a[0]*b[0])
541          * r[32] holds (b[1]*b[1])
542          */
543
544         c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
545
546         if (neg) /* if t[32] is negative */
547         {
548                 c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
549         } else {
550                 /* Might have a carry */
551                 c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
552         }
553
554         /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
555          * r[10] holds (a[0]*b[0])
556          * r[32] holds (b[1]*b[1])
557          * c1 holds the carry bits
558          */
559         c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
560         if (c1) {
561                 p = &(r[n + n2]);
562                 lo= *p;
563                 ln = (lo + c1) & BN_MASK2;
564                 *p = ln;
565
566                 /* The overflow will stop before we over write
567                  * words we should not overwrite */
568                 if (ln < (BN_ULONG)c1) {
569                         do {
570                                 p++;
571                                 lo= *p;
572                                 ln = (lo + 1) & BN_MASK2;
573                                 *p = ln;
574                         } while (ln == 0);
575                 }
576         }
577 }
578
579 /* n+tn is the word length
580  * t needs to be n*4 is size, as does r */
581 /* tnX may not be negative but less than n */
582 void
583 bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna,
584     int tnb, BN_ULONG *t)
585 {
586         int i, j, n2 = n * 2;
587         int c1, c2, neg;
588         BN_ULONG ln, lo, *p;
589
590 # ifdef BN_COUNT
591         fprintf(stderr, " bn_mul_part_recursive (%d%+d) * (%d%+d)\n",
592             n, tna, n, tnb);
593 # endif
594         if (n < 8) {
595                 bn_mul_normal(r, a, n + tna, b, n + tnb);
596                 return;
597         }
598
599         /* r=(a[0]-a[1])*(b[1]-b[0]) */
600         c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
601         c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
602         neg = 0;
603         switch (c1 * 3 + c2) {
604         case -4:
605                 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
606                 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
607                 break;
608         case -3:
609                 /* break; */
610         case -2:
611                 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
612                 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
613                 neg = 1;
614                 break;
615         case -1:
616         case 0:
617         case 1:
618                 /* break; */
619         case 2:
620                 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
621                 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
622                 neg = 1;
623                 break;
624         case 3:
625                 /* break; */
626         case 4:
627                 bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
628                 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
629                 break;
630         }
631                 /* The zero case isn't yet implemented here. The speedup
632                    would probably be negligible. */
633 # if 0
634         if (n == 4) {
635                 bn_mul_comba4(&(t[n2]), t, &(t[n]));
636                 bn_mul_comba4(r, a, b);
637                 bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn);
638                 memset(&(r[n2 + tn * 2]), 0, sizeof(BN_ULONG) * (n2 - tn * 2));
639         } else
640 # endif
641                 if (n == 8) {
642                 bn_mul_comba8(&(t[n2]), t, &(t[n]));
643                 bn_mul_comba8(r, a, b);
644                 bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
645                 memset(&(r[n2 + tna + tnb]), 0,
646                     sizeof(BN_ULONG) * (n2 - tna - tnb));
647         } else {
648                 p = &(t[n2*2]);
649                 bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
650                 bn_mul_recursive(r, a, b, n, 0, 0, p);
651                 i = n / 2;
652                 /* If there is only a bottom half to the number,
653                  * just do it */
654                 if (tna > tnb)
655                         j = tna - i;
656                 else
657                         j = tnb - i;
658                 if (j == 0) {
659                         bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]),
660                             i, tna - i, tnb - i, p);
661                         memset(&(r[n2 + i * 2]), 0,
662                             sizeof(BN_ULONG) * (n2 - i * 2));
663                 }
664                 else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
665                 {
666                         bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]),
667                             i, tna - i, tnb - i, p);
668                         memset(&(r[n2 + tna + tnb]), 0,
669                             sizeof(BN_ULONG) * (n2 - tna - tnb));
670                 }
671                 else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
672                 {
673                         memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2);
674                         if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL &&
675                             tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
676                                 bn_mul_normal(&(r[n2]), &(a[n]), tna,
677                                     &(b[n]), tnb);
678                         } else {
679                                 for (;;) {
680                                         i /= 2;
681                                         /* these simplified conditions work
682                                          * exclusively because difference
683                                          * between tna and tnb is 1 or 0 */
684                                         if (i < tna || i < tnb) {
685                                                 bn_mul_part_recursive(&(r[n2]),
686                                                     &(a[n]), &(b[n]), i,
687                                                     tna - i, tnb - i, p);
688                                                 break;
689                                         } else if (i == tna || i == tnb) {
690                                                 bn_mul_recursive(&(r[n2]),
691                                                     &(a[n]), &(b[n]), i,
692                                                     tna - i, tnb - i, p);
693                                                 break;
694                                         }
695                                 }
696                         }
697                 }
698         }
699
700         /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
701          * r[10] holds (a[0]*b[0])
702          * r[32] holds (b[1]*b[1])
703          */
704
705         c1 = (int)(bn_add_words(t, r,&(r[n2]), n2));
706
707         if (neg) /* if t[32] is negative */
708         {
709                 c1 -= (int)(bn_sub_words(&(t[n2]), t,&(t[n2]), n2));
710         } else {
711                 /* Might have a carry */
712                 c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
713         }
714
715         /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
716          * r[10] holds (a[0]*b[0])
717          * r[32] holds (b[1]*b[1])
718          * c1 holds the carry bits
719          */
720         c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
721         if (c1) {
722                 p = &(r[n + n2]);
723                 lo= *p;
724                 ln = (lo + c1)&BN_MASK2;
725                 *p = ln;
726
727                 /* The overflow will stop before we over write
728                  * words we should not overwrite */
729                 if (ln < (BN_ULONG)c1) {
730                         do {
731                                 p++;
732                                 lo= *p;
733                                 ln = (lo + 1) & BN_MASK2;
734                                 *p = ln;
735                         } while (ln == 0);
736                 }
737         }
738 }
739
740 /* a and b must be the same size, which is n2.
741  * r needs to be n2 words and t needs to be n2*2
742  */
743 void
744 bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, BN_ULONG *t)
745 {
746         int n = n2 / 2;
747
748 # ifdef BN_COUNT
749         fprintf(stderr, " bn_mul_low_recursive %d * %d\n",n2,n2);
750 # endif
751
752         bn_mul_recursive(r, a, b, n, 0, 0, &(t[0]));
753         if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) {
754                 bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2]));
755                 bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
756                 bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2]));
757                 bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
758         } else {
759                 bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n);
760                 bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n);
761                 bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
762                 bn_add_words(&(r[n]), &(r[n]), &(t[n]), n);
763         }
764 }
765
766 /* a and b must be the same size, which is n2.
767  * r needs to be n2 words and t needs to be n2*2
768  * l is the low words of the output.
769  * t needs to be n2*3
770  */
771 void
772 bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
773     BN_ULONG *t)
774 {
775         int i, n;
776         int c1, c2;
777         int neg, oneg, zero;
778         BN_ULONG ll, lc, *lp, *mp;
779
780 # ifdef BN_COUNT
781         fprintf(stderr, " bn_mul_high %d * %d\n",n2,n2);
782 # endif
783         n = n2 / 2;
784
785         /* Calculate (al-ah)*(bh-bl) */
786         neg = zero = 0;
787         c1 = bn_cmp_words(&(a[0]), &(a[n]), n);
788         c2 = bn_cmp_words(&(b[n]), &(b[0]), n);
789         switch (c1 * 3 + c2) {
790         case -4:
791                 bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
792                 bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
793                 break;
794         case -3:
795                 zero = 1;
796                 break;
797         case -2:
798                 bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
799                 bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
800                 neg = 1;
801                 break;
802         case -1:
803         case 0:
804         case 1:
805                 zero = 1;
806                 break;
807         case 2:
808                 bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
809                 bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
810                 neg = 1;
811                 break;
812         case 3:
813                 zero = 1;
814                 break;
815         case 4:
816                 bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
817                 bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
818                 break;
819         }
820
821         oneg = neg;
822         /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
823         /* r[10] = (a[1]*b[1]) */
824 # ifdef BN_MUL_COMBA
825         if (n == 8) {
826                 bn_mul_comba8(&(t[0]), &(r[0]), &(r[n]));
827                 bn_mul_comba8(r, &(a[n]), &(b[n]));
828         } else
829 # endif
830         {
831                 bn_mul_recursive(&(t[0]), &(r[0]), &(r[n]), n, 0, 0, &(t[n2]));
832                 bn_mul_recursive(r, &(a[n]), &(b[n]), n, 0, 0, &(t[n2]));
833         }
834
835         /* s0 == low(al*bl)
836          * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
837          * We know s0 and s1 so the only unknown is high(al*bl)
838          * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
839          * high(al*bl) == s1 - (r[0]+l[0]+t[0])
840          */
841         if (l != NULL) {
842                 lp = &(t[n2 + n]);
843                 c1 = (int)(bn_add_words(lp, &(r[0]), &(l[0]), n));
844         } else {
845                 c1 = 0;
846                 lp = &(r[0]);
847         }
848
849         if (neg)
850                 neg = (int)(bn_sub_words(&(t[n2]), lp, &(t[0]), n));
851         else {
852                 bn_add_words(&(t[n2]), lp, &(t[0]), n);
853                 neg = 0;
854         }
855
856         if (l != NULL) {
857                 bn_sub_words(&(t[n2 + n]), &(l[n]), &(t[n2]), n);
858         } else {
859                 lp = &(t[n2 + n]);
860                 mp = &(t[n2]);
861                 for (i = 0; i < n; i++)
862                         lp[i] = ((~mp[i]) + 1) & BN_MASK2;
863         }
864
865         /* s[0] = low(al*bl)
866          * t[3] = high(al*bl)
867          * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
868          * r[10] = (a[1]*b[1])
869          */
870         /* R[10] = al*bl
871          * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
872          * R[32] = ah*bh
873          */
874         /* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
875          * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
876          * R[3]=r[1]+(carry/borrow)
877          */
878         if (l != NULL) {
879                 lp = &(t[n2]);
880                 c1 = (int)(bn_add_words(lp, &(t[n2 + n]), &(l[0]), n));
881         } else {
882                 lp = &(t[n2 + n]);
883                 c1 = 0;
884         }
885         c1 += (int)(bn_add_words(&(t[n2]), lp, &(r[0]), n));
886         if (oneg)
887                 c1 -= (int)(bn_sub_words(&(t[n2]), &(t[n2]), &(t[0]), n));
888         else
889                 c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), &(t[0]), n));
890
891         c2 = (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n2 + n]), n));
892         c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(r[n]), n));
893         if (oneg)
894                 c2 -= (int)(bn_sub_words(&(r[0]), &(r[0]), &(t[n]), n));
895         else
896                 c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n]), n));
897
898         if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
899         {
900                 i = 0;
901                 if (c1 > 0) {
902                         lc = c1;
903                         do {
904                                 ll = (r[i] + lc) & BN_MASK2;
905                                 r[i++] = ll;
906                                 lc = (lc > ll);
907                         } while (lc);
908                 } else {
909                         lc = -c1;
910                         do {
911                                 ll = r[i];
912                                 r[i++] = (ll - lc) & BN_MASK2;
913                                 lc = (lc > ll);
914                         } while (lc);
915                 }
916         }
917         if (c2 != 0) /* Add starting at r[1] */
918         {
919                 i = n;
920                 if (c2 > 0) {
921                         lc = c2;
922                         do {
923                                 ll = (r[i] + lc) & BN_MASK2;
924                                 r[i++] = ll;
925                                 lc = (lc > ll);
926                         } while (lc);
927                 } else {
928                         lc = -c2;
929                         do {
930                                 ll = r[i];
931                                 r[i++] = (ll - lc) & BN_MASK2;
932                                 lc = (lc > ll);
933                         } while (lc);
934                 }
935         }
936 }
937 #endif /* BN_RECURSION */
938
939 int
940 BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
941 {
942         int ret = 0;
943         int top, al, bl;
944         BIGNUM *rr;
945 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
946         int i;
947 #endif
948 #ifdef BN_RECURSION
949         BIGNUM *t = NULL;
950         int j = 0, k;
951 #endif
952
953 #ifdef BN_COUNT
954         fprintf(stderr, "BN_mul %d * %d\n",a->top,b->top);
955 #endif
956
957         bn_check_top(a);
958         bn_check_top(b);
959         bn_check_top(r);
960
961         al = a->top;
962         bl = b->top;
963
964         if ((al == 0) || (bl == 0)) {
965                 BN_zero(r);
966                 return (1);
967         }
968         top = al + bl;
969
970         BN_CTX_start(ctx);
971         if ((r == a) || (r == b)) {
972                 if ((rr = BN_CTX_get(ctx)) == NULL)
973                         goto err;
974         } else
975                 rr = r;
976         rr->neg = a->neg ^ b->neg;
977
978 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
979         i = al - bl;
980 #endif
981 #ifdef BN_MUL_COMBA
982         if (i == 0) {
983 # if 0
984                 if (al == 4) {
985                         if (bn_wexpand(rr, 8) == NULL)
986                                 goto err;
987                         rr->top = 8;
988                         bn_mul_comba4(rr->d, a->d, b->d);
989                         goto end;
990                 }
991 # endif
992                 if (al == 8) {
993                         if (bn_wexpand(rr, 16) == NULL)
994                                 goto err;
995                         rr->top = 16;
996                         bn_mul_comba8(rr->d, a->d, b->d);
997                         goto end;
998                 }
999         }
1000 #endif /* BN_MUL_COMBA */
1001 #ifdef BN_RECURSION
1002         if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) {
1003                 if (i >= -1 && i <= 1) {
1004                         /* Find out the power of two lower or equal
1005                            to the longest of the two numbers */
1006                         if (i >= 0) {
1007                                 j = BN_num_bits_word((BN_ULONG)al);
1008                         }
1009                         if (i == -1) {
1010                                 j = BN_num_bits_word((BN_ULONG)bl);
1011                         }
1012                         j = 1 << (j - 1);
1013                         assert(j <= al || j <= bl);
1014                         k = j + j;
1015                         if ((t = BN_CTX_get(ctx)) == NULL)
1016                                 goto err;
1017                         if (al > j || bl > j) {
1018                                 if (bn_wexpand(t, k * 4) == NULL)
1019                                         goto err;
1020                                 if (bn_wexpand(rr, k * 4) == NULL)
1021                                         goto err;
1022                                 bn_mul_part_recursive(rr->d, a->d, b->d,
1023                                     j, al - j, bl - j, t->d);
1024                         }
1025                         else    /* al <= j || bl <= j */
1026                         {
1027                                 if (bn_wexpand(t, k * 2) == NULL)
1028                                         goto err;
1029                                 if (bn_wexpand(rr, k * 2) == NULL)
1030                                         goto err;
1031                                 bn_mul_recursive(rr->d, a->d, b->d,
1032                                     j, al - j, bl - j, t->d);
1033                         }
1034                         rr->top = top;
1035                         goto end;
1036                 }
1037 #if 0
1038                 if (i == 1 && !BN_get_flags(b, BN_FLG_STATIC_DATA)) {
1039                         BIGNUM *tmp_bn = (BIGNUM *)b;
1040                         if (bn_wexpand(tmp_bn, al) == NULL)
1041                                 goto err;
1042                         tmp_bn->d[bl] = 0;
1043                         bl++;
1044                         i--;
1045                 } else if (i == -1 && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
1046                         BIGNUM *tmp_bn = (BIGNUM *)a;
1047                         if (bn_wexpand(tmp_bn, bl) == NULL)
1048                                 goto err;
1049                         tmp_bn->d[al] = 0;
1050                         al++;
1051                         i++;
1052                 }
1053                 if (i == 0) {
1054                         /* symmetric and > 4 */
1055                         /* 16 or larger */
1056                         j = BN_num_bits_word((BN_ULONG)al);
1057                         j = 1 << (j - 1);
1058                         k = j + j;
1059                         if ((t = BN_CTX_get(ctx)) == NULL)
1060                                 goto err;
1061                         if (al == j) /* exact multiple */
1062                         {
1063                                 if (bn_wexpand(t, k * 2) == NULL)
1064                                         goto err;
1065                                 if (bn_wexpand(rr, k * 2) == NULL)
1066                                         goto err;
1067                                 bn_mul_recursive(rr->d, a->d, b->d, al, t->d);
1068                         } else {
1069                                 if (bn_wexpand(t, k * 4) == NULL)
1070                                         goto err;
1071                                 if (bn_wexpand(rr, k * 4) == NULL)
1072                                         goto err;
1073                                 bn_mul_part_recursive(rr->d, a->d, b->d,
1074                                     al - j, j, t->d);
1075                         }
1076                         rr->top = top;
1077                         goto end;
1078                 }
1079 #endif
1080         }
1081 #endif /* BN_RECURSION */
1082         if (bn_wexpand(rr, top) == NULL)
1083                 goto err;
1084         rr->top = top;
1085         bn_mul_normal(rr->d, a->d, al, b->d, bl);
1086
1087 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1088 end:
1089 #endif
1090         bn_correct_top(rr);
1091         if (r != rr)
1092                 BN_copy(r, rr);
1093         ret = 1;
1094 err:
1095         bn_check_top(r);
1096         BN_CTX_end(ctx);
1097         return (ret);
1098 }
1099
1100 void
1101 bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1102 {
1103         BN_ULONG *rr;
1104
1105 #ifdef BN_COUNT
1106         fprintf(stderr, " bn_mul_normal %d * %d\n", na, nb);
1107 #endif
1108
1109         if (na < nb) {
1110                 int itmp;
1111                 BN_ULONG *ltmp;
1112
1113                 itmp = na;
1114                 na = nb;
1115                 nb = itmp;
1116                 ltmp = a;
1117                 a = b;
1118                 b = ltmp;
1119
1120         }
1121         rr = &(r[na]);
1122         if (nb <= 0) {
1123                 (void)bn_mul_words(r, a, na, 0);
1124                 return;
1125         } else
1126                 rr[0] = bn_mul_words(r, a, na, b[0]);
1127
1128         for (;;) {
1129                 if (--nb <= 0)
1130                         return;
1131                 rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
1132                 if (--nb <= 0)
1133                         return;
1134                 rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
1135                 if (--nb <= 0)
1136                         return;
1137                 rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
1138                 if (--nb <= 0)
1139                         return;
1140                 rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
1141                 rr += 4;
1142                 r += 4;
1143                 b += 4;
1144         }
1145 }
1146
1147 void
1148 bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1149 {
1150 #ifdef BN_COUNT
1151         fprintf(stderr, " bn_mul_low_normal %d * %d\n", n, n);
1152 #endif
1153         bn_mul_words(r, a, n, b[0]);
1154
1155         for (;;) {
1156                 if (--n <= 0)
1157                         return;
1158                 bn_mul_add_words(&(r[1]), a, n, b[1]);
1159                 if (--n <= 0)
1160                         return;
1161                 bn_mul_add_words(&(r[2]), a, n, b[2]);
1162                 if (--n <= 0)
1163                         return;
1164                 bn_mul_add_words(&(r[3]), a, n, b[3]);
1165                 if (--n <= 0)
1166                         return;
1167                 bn_mul_add_words(&(r[4]), a, n, b[4]);
1168                 r += 4;
1169                 b += 4;
1170         }
1171 }