f5af284d4e1c14578b077e2e29e663d7c3588785
[dragonfly.git] / secure / lib / libcrypto / asm / ecp_nistz256-x86_64.s
1 .text   
2
3
4
5 .align  64
6 .Lpoly:
7 .quad   0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
8
9
10 .LRR:
11 .quad   0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd
12
13 .LOne:
14 .long   1,1,1,1,1,1,1,1
15 .LTwo:
16 .long   2,2,2,2,2,2,2,2
17 .LThree:
18 .long   3,3,3,3,3,3,3,3
19 .LONE_mont:
20 .quad   0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
21
22 .globl  ecp_nistz256_mul_by_2
23 .type   ecp_nistz256_mul_by_2,@function
24 .align  64
25 ecp_nistz256_mul_by_2:
26         pushq   %r12
27         pushq   %r13
28
29         movq    0(%rsi),%r8
30         movq    8(%rsi),%r9
31         addq    %r8,%r8
32         movq    16(%rsi),%r10
33         adcq    %r9,%r9
34         movq    24(%rsi),%r11
35         leaq    .Lpoly(%rip),%rsi
36         movq    %r8,%rax
37         adcq    %r10,%r10
38         adcq    %r11,%r11
39         movq    %r9,%rdx
40         sbbq    %r13,%r13
41
42         subq    0(%rsi),%r8
43         movq    %r10,%rcx
44         sbbq    8(%rsi),%r9
45         sbbq    16(%rsi),%r10
46         movq    %r11,%r12
47         sbbq    24(%rsi),%r11
48         testq   %r13,%r13
49
50         cmovzq  %rax,%r8
51         cmovzq  %rdx,%r9
52         movq    %r8,0(%rdi)
53         cmovzq  %rcx,%r10
54         movq    %r9,8(%rdi)
55         cmovzq  %r12,%r11
56         movq    %r10,16(%rdi)
57         movq    %r11,24(%rdi)
58
59         popq    %r13
60         popq    %r12
61         .byte   0xf3,0xc3
62 .size   ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2
63
64
65
66 .globl  ecp_nistz256_div_by_2
67 .type   ecp_nistz256_div_by_2,@function
68 .align  32
69 ecp_nistz256_div_by_2:
70         pushq   %r12
71         pushq   %r13
72
73         movq    0(%rsi),%r8
74         movq    8(%rsi),%r9
75         movq    16(%rsi),%r10
76         movq    %r8,%rax
77         movq    24(%rsi),%r11
78         leaq    .Lpoly(%rip),%rsi
79
80         movq    %r9,%rdx
81         xorq    %r13,%r13
82         addq    0(%rsi),%r8
83         movq    %r10,%rcx
84         adcq    8(%rsi),%r9
85         adcq    16(%rsi),%r10
86         movq    %r11,%r12
87         adcq    24(%rsi),%r11
88         adcq    $0,%r13
89         xorq    %rsi,%rsi
90         testq   $1,%rax
91
92         cmovzq  %rax,%r8
93         cmovzq  %rdx,%r9
94         cmovzq  %rcx,%r10
95         cmovzq  %r12,%r11
96         cmovzq  %rsi,%r13
97
98         movq    %r9,%rax
99         shrq    $1,%r8
100         shlq    $63,%rax
101         movq    %r10,%rdx
102         shrq    $1,%r9
103         orq     %rax,%r8
104         shlq    $63,%rdx
105         movq    %r11,%rcx
106         shrq    $1,%r10
107         orq     %rdx,%r9
108         shlq    $63,%rcx
109         shrq    $1,%r11
110         shlq    $63,%r13
111         orq     %rcx,%r10
112         orq     %r13,%r11
113
114         movq    %r8,0(%rdi)
115         movq    %r9,8(%rdi)
116         movq    %r10,16(%rdi)
117         movq    %r11,24(%rdi)
118
119         popq    %r13
120         popq    %r12
121         .byte   0xf3,0xc3
122 .size   ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2
123
124
125
126 .globl  ecp_nistz256_mul_by_3
127 .type   ecp_nistz256_mul_by_3,@function
128 .align  32
129 ecp_nistz256_mul_by_3:
130         pushq   %r12
131         pushq   %r13
132
133         movq    0(%rsi),%r8
134         xorq    %r13,%r13
135         movq    8(%rsi),%r9
136         addq    %r8,%r8
137         movq    16(%rsi),%r10
138         adcq    %r9,%r9
139         movq    24(%rsi),%r11
140         movq    %r8,%rax
141         adcq    %r10,%r10
142         adcq    %r11,%r11
143         movq    %r9,%rdx
144         adcq    $0,%r13
145
146         subq    $-1,%r8
147         movq    %r10,%rcx
148         sbbq    .Lpoly+8(%rip),%r9
149         sbbq    $0,%r10
150         movq    %r11,%r12
151         sbbq    .Lpoly+24(%rip),%r11
152         testq   %r13,%r13
153
154         cmovzq  %rax,%r8
155         cmovzq  %rdx,%r9
156         cmovzq  %rcx,%r10
157         cmovzq  %r12,%r11
158
159         xorq    %r13,%r13
160         addq    0(%rsi),%r8
161         adcq    8(%rsi),%r9
162         movq    %r8,%rax
163         adcq    16(%rsi),%r10
164         adcq    24(%rsi),%r11
165         movq    %r9,%rdx
166         adcq    $0,%r13
167
168         subq    $-1,%r8
169         movq    %r10,%rcx
170         sbbq    .Lpoly+8(%rip),%r9
171         sbbq    $0,%r10
172         movq    %r11,%r12
173         sbbq    .Lpoly+24(%rip),%r11
174         testq   %r13,%r13
175
176         cmovzq  %rax,%r8
177         cmovzq  %rdx,%r9
178         movq    %r8,0(%rdi)
179         cmovzq  %rcx,%r10
180         movq    %r9,8(%rdi)
181         cmovzq  %r12,%r11
182         movq    %r10,16(%rdi)
183         movq    %r11,24(%rdi)
184
185         popq    %r13
186         popq    %r12
187         .byte   0xf3,0xc3
188 .size   ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3
189
190
191
192 .globl  ecp_nistz256_add
193 .type   ecp_nistz256_add,@function
194 .align  32
195 ecp_nistz256_add:
196         pushq   %r12
197         pushq   %r13
198
199         movq    0(%rsi),%r8
200         xorq    %r13,%r13
201         movq    8(%rsi),%r9
202         movq    16(%rsi),%r10
203         movq    24(%rsi),%r11
204         leaq    .Lpoly(%rip),%rsi
205
206         addq    0(%rdx),%r8
207         adcq    8(%rdx),%r9
208         movq    %r8,%rax
209         adcq    16(%rdx),%r10
210         adcq    24(%rdx),%r11
211         movq    %r9,%rdx
212         adcq    $0,%r13
213
214         subq    0(%rsi),%r8
215         movq    %r10,%rcx
216         sbbq    8(%rsi),%r9
217         sbbq    16(%rsi),%r10
218         movq    %r11,%r12
219         sbbq    24(%rsi),%r11
220         testq   %r13,%r13
221
222         cmovzq  %rax,%r8
223         cmovzq  %rdx,%r9
224         movq    %r8,0(%rdi)
225         cmovzq  %rcx,%r10
226         movq    %r9,8(%rdi)
227         cmovzq  %r12,%r11
228         movq    %r10,16(%rdi)
229         movq    %r11,24(%rdi)
230
231         popq    %r13
232         popq    %r12
233         .byte   0xf3,0xc3
234 .size   ecp_nistz256_add,.-ecp_nistz256_add
235
236
237
238 .globl  ecp_nistz256_sub
239 .type   ecp_nistz256_sub,@function
240 .align  32
241 ecp_nistz256_sub:
242         pushq   %r12
243         pushq   %r13
244
245         movq    0(%rsi),%r8
246         xorq    %r13,%r13
247         movq    8(%rsi),%r9
248         movq    16(%rsi),%r10
249         movq    24(%rsi),%r11
250         leaq    .Lpoly(%rip),%rsi
251
252         subq    0(%rdx),%r8
253         sbbq    8(%rdx),%r9
254         movq    %r8,%rax
255         sbbq    16(%rdx),%r10
256         sbbq    24(%rdx),%r11
257         movq    %r9,%rdx
258         sbbq    $0,%r13
259
260         addq    0(%rsi),%r8
261         movq    %r10,%rcx
262         adcq    8(%rsi),%r9
263         adcq    16(%rsi),%r10
264         movq    %r11,%r12
265         adcq    24(%rsi),%r11
266         testq   %r13,%r13
267
268         cmovzq  %rax,%r8
269         cmovzq  %rdx,%r9
270         movq    %r8,0(%rdi)
271         cmovzq  %rcx,%r10
272         movq    %r9,8(%rdi)
273         cmovzq  %r12,%r11
274         movq    %r10,16(%rdi)
275         movq    %r11,24(%rdi)
276
277         popq    %r13
278         popq    %r12
279         .byte   0xf3,0xc3
280 .size   ecp_nistz256_sub,.-ecp_nistz256_sub
281
282
283
284 .globl  ecp_nistz256_neg
285 .type   ecp_nistz256_neg,@function
286 .align  32
287 ecp_nistz256_neg:
288         pushq   %r12
289         pushq   %r13
290
291         xorq    %r8,%r8
292         xorq    %r9,%r9
293         xorq    %r10,%r10
294         xorq    %r11,%r11
295         xorq    %r13,%r13
296
297         subq    0(%rsi),%r8
298         sbbq    8(%rsi),%r9
299         sbbq    16(%rsi),%r10
300         movq    %r8,%rax
301         sbbq    24(%rsi),%r11
302         leaq    .Lpoly(%rip),%rsi
303         movq    %r9,%rdx
304         sbbq    $0,%r13
305
306         addq    0(%rsi),%r8
307         movq    %r10,%rcx
308         adcq    8(%rsi),%r9
309         adcq    16(%rsi),%r10
310         movq    %r11,%r12
311         adcq    24(%rsi),%r11
312         testq   %r13,%r13
313
314         cmovzq  %rax,%r8
315         cmovzq  %rdx,%r9
316         movq    %r8,0(%rdi)
317         cmovzq  %rcx,%r10
318         movq    %r9,8(%rdi)
319         cmovzq  %r12,%r11
320         movq    %r10,16(%rdi)
321         movq    %r11,24(%rdi)
322
323         popq    %r13
324         popq    %r12
325         .byte   0xf3,0xc3
326 .size   ecp_nistz256_neg,.-ecp_nistz256_neg
327
328
329
330
331 .globl  ecp_nistz256_to_mont
332 .type   ecp_nistz256_to_mont,@function
333 .align  32
334 ecp_nistz256_to_mont:
335         leaq    .LRR(%rip),%rdx
336         jmp     .Lmul_mont
337 .size   ecp_nistz256_to_mont,.-ecp_nistz256_to_mont
338
339
340
341
342
343
344
345 .globl  ecp_nistz256_mul_mont
346 .type   ecp_nistz256_mul_mont,@function
347 .align  32
348 ecp_nistz256_mul_mont:
349 .Lmul_mont:
350         pushq   %rbp
351         pushq   %rbx
352         pushq   %r12
353         pushq   %r13
354         pushq   %r14
355         pushq   %r15
356         movq    %rdx,%rbx
357         movq    0(%rdx),%rax
358         movq    0(%rsi),%r9
359         movq    8(%rsi),%r10
360         movq    16(%rsi),%r11
361         movq    24(%rsi),%r12
362
363         call    __ecp_nistz256_mul_montq
364 .Lmul_mont_done:
365         popq    %r15
366         popq    %r14
367         popq    %r13
368         popq    %r12
369         popq    %rbx
370         popq    %rbp
371         .byte   0xf3,0xc3
372 .size   ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
373
374 .type   __ecp_nistz256_mul_montq,@function
375 .align  32
376 __ecp_nistz256_mul_montq:
377
378
379         movq    %rax,%rbp
380         mulq    %r9
381         movq    .Lpoly+8(%rip),%r14
382         movq    %rax,%r8
383         movq    %rbp,%rax
384         movq    %rdx,%r9
385
386         mulq    %r10
387         movq    .Lpoly+24(%rip),%r15
388         addq    %rax,%r9
389         movq    %rbp,%rax
390         adcq    $0,%rdx
391         movq    %rdx,%r10
392
393         mulq    %r11
394         addq    %rax,%r10
395         movq    %rbp,%rax
396         adcq    $0,%rdx
397         movq    %rdx,%r11
398
399         mulq    %r12
400         addq    %rax,%r11
401         movq    %r8,%rax
402         adcq    $0,%rdx
403         xorq    %r13,%r13
404         movq    %rdx,%r12
405
406
407
408
409
410
411
412
413
414
415         movq    %r8,%rbp
416         shlq    $32,%r8
417         mulq    %r15
418         shrq    $32,%rbp
419         addq    %r8,%r9
420         adcq    %rbp,%r10
421         adcq    %rax,%r11
422         movq    8(%rbx),%rax
423         adcq    %rdx,%r12
424         adcq    $0,%r13
425         xorq    %r8,%r8
426
427
428
429         movq    %rax,%rbp
430         mulq    0(%rsi)
431         addq    %rax,%r9
432         movq    %rbp,%rax
433         adcq    $0,%rdx
434         movq    %rdx,%rcx
435
436         mulq    8(%rsi)
437         addq    %rcx,%r10
438         adcq    $0,%rdx
439         addq    %rax,%r10
440         movq    %rbp,%rax
441         adcq    $0,%rdx
442         movq    %rdx,%rcx
443
444         mulq    16(%rsi)
445         addq    %rcx,%r11
446         adcq    $0,%rdx
447         addq    %rax,%r11
448         movq    %rbp,%rax
449         adcq    $0,%rdx
450         movq    %rdx,%rcx
451
452         mulq    24(%rsi)
453         addq    %rcx,%r12
454         adcq    $0,%rdx
455         addq    %rax,%r12
456         movq    %r9,%rax
457         adcq    %rdx,%r13
458         adcq    $0,%r8
459
460
461
462         movq    %r9,%rbp
463         shlq    $32,%r9
464         mulq    %r15
465         shrq    $32,%rbp
466         addq    %r9,%r10
467         adcq    %rbp,%r11
468         adcq    %rax,%r12
469         movq    16(%rbx),%rax
470         adcq    %rdx,%r13
471         adcq    $0,%r8
472         xorq    %r9,%r9
473
474
475
476         movq    %rax,%rbp
477         mulq    0(%rsi)
478         addq    %rax,%r10
479         movq    %rbp,%rax
480         adcq    $0,%rdx
481         movq    %rdx,%rcx
482
483         mulq    8(%rsi)
484         addq    %rcx,%r11
485         adcq    $0,%rdx
486         addq    %rax,%r11
487         movq    %rbp,%rax
488         adcq    $0,%rdx
489         movq    %rdx,%rcx
490
491         mulq    16(%rsi)
492         addq    %rcx,%r12
493         adcq    $0,%rdx
494         addq    %rax,%r12
495         movq    %rbp,%rax
496         adcq    $0,%rdx
497         movq    %rdx,%rcx
498
499         mulq    24(%rsi)
500         addq    %rcx,%r13
501         adcq    $0,%rdx
502         addq    %rax,%r13
503         movq    %r10,%rax
504         adcq    %rdx,%r8
505         adcq    $0,%r9
506
507
508
509         movq    %r10,%rbp
510         shlq    $32,%r10
511         mulq    %r15
512         shrq    $32,%rbp
513         addq    %r10,%r11
514         adcq    %rbp,%r12
515         adcq    %rax,%r13
516         movq    24(%rbx),%rax
517         adcq    %rdx,%r8
518         adcq    $0,%r9
519         xorq    %r10,%r10
520
521
522
523         movq    %rax,%rbp
524         mulq    0(%rsi)
525         addq    %rax,%r11
526         movq    %rbp,%rax
527         adcq    $0,%rdx
528         movq    %rdx,%rcx
529
530         mulq    8(%rsi)
531         addq    %rcx,%r12
532         adcq    $0,%rdx
533         addq    %rax,%r12
534         movq    %rbp,%rax
535         adcq    $0,%rdx
536         movq    %rdx,%rcx
537
538         mulq    16(%rsi)
539         addq    %rcx,%r13
540         adcq    $0,%rdx
541         addq    %rax,%r13
542         movq    %rbp,%rax
543         adcq    $0,%rdx
544         movq    %rdx,%rcx
545
546         mulq    24(%rsi)
547         addq    %rcx,%r8
548         adcq    $0,%rdx
549         addq    %rax,%r8
550         movq    %r11,%rax
551         adcq    %rdx,%r9
552         adcq    $0,%r10
553
554
555
556         movq    %r11,%rbp
557         shlq    $32,%r11
558         mulq    %r15
559         shrq    $32,%rbp
560         addq    %r11,%r12
561         adcq    %rbp,%r13
562         movq    %r12,%rcx
563         adcq    %rax,%r8
564         adcq    %rdx,%r9
565         movq    %r13,%rbp
566         adcq    $0,%r10
567
568
569
570         subq    $-1,%r12
571         movq    %r8,%rbx
572         sbbq    %r14,%r13
573         sbbq    $0,%r8
574         movq    %r9,%rdx
575         sbbq    %r15,%r9
576         sbbq    $0,%r10
577
578         cmovcq  %rcx,%r12
579         cmovcq  %rbp,%r13
580         movq    %r12,0(%rdi)
581         cmovcq  %rbx,%r8
582         movq    %r13,8(%rdi)
583         cmovcq  %rdx,%r9
584         movq    %r8,16(%rdi)
585         movq    %r9,24(%rdi)
586
587         .byte   0xf3,0xc3
588 .size   __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
589
590
591
592
593
594
595
596
597 .globl  ecp_nistz256_sqr_mont
598 .type   ecp_nistz256_sqr_mont,@function
599 .align  32
600 ecp_nistz256_sqr_mont:
601         pushq   %rbp
602         pushq   %rbx
603         pushq   %r12
604         pushq   %r13
605         pushq   %r14
606         pushq   %r15
607         movq    0(%rsi),%rax
608         movq    8(%rsi),%r14
609         movq    16(%rsi),%r15
610         movq    24(%rsi),%r8
611
612         call    __ecp_nistz256_sqr_montq
613 .Lsqr_mont_done:
614         popq    %r15
615         popq    %r14
616         popq    %r13
617         popq    %r12
618         popq    %rbx
619         popq    %rbp
620         .byte   0xf3,0xc3
621 .size   ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
622
623 .type   __ecp_nistz256_sqr_montq,@function
624 .align  32
625 __ecp_nistz256_sqr_montq:
626         movq    %rax,%r13
627         mulq    %r14
628         movq    %rax,%r9
629         movq    %r15,%rax
630         movq    %rdx,%r10
631
632         mulq    %r13
633         addq    %rax,%r10
634         movq    %r8,%rax
635         adcq    $0,%rdx
636         movq    %rdx,%r11
637
638         mulq    %r13
639         addq    %rax,%r11
640         movq    %r15,%rax
641         adcq    $0,%rdx
642         movq    %rdx,%r12
643
644
645         mulq    %r14
646         addq    %rax,%r11
647         movq    %r8,%rax
648         adcq    $0,%rdx
649         movq    %rdx,%rbp
650
651         mulq    %r14
652         addq    %rax,%r12
653         movq    %r8,%rax
654         adcq    $0,%rdx
655         addq    %rbp,%r12
656         movq    %rdx,%r13
657         adcq    $0,%r13
658
659
660         mulq    %r15
661         xorq    %r15,%r15
662         addq    %rax,%r13
663         movq    0(%rsi),%rax
664         movq    %rdx,%r14
665         adcq    $0,%r14
666
667         addq    %r9,%r9
668         adcq    %r10,%r10
669         adcq    %r11,%r11
670         adcq    %r12,%r12
671         adcq    %r13,%r13
672         adcq    %r14,%r14
673         adcq    $0,%r15
674
675         mulq    %rax
676         movq    %rax,%r8
677         movq    8(%rsi),%rax
678         movq    %rdx,%rcx
679
680         mulq    %rax
681         addq    %rcx,%r9
682         adcq    %rax,%r10
683         movq    16(%rsi),%rax
684         adcq    $0,%rdx
685         movq    %rdx,%rcx
686
687         mulq    %rax
688         addq    %rcx,%r11
689         adcq    %rax,%r12
690         movq    24(%rsi),%rax
691         adcq    $0,%rdx
692         movq    %rdx,%rcx
693
694         mulq    %rax
695         addq    %rcx,%r13
696         adcq    %rax,%r14
697         movq    %r8,%rax
698         adcq    %rdx,%r15
699
700         movq    .Lpoly+8(%rip),%rsi
701         movq    .Lpoly+24(%rip),%rbp
702
703
704
705
706         movq    %r8,%rcx
707         shlq    $32,%r8
708         mulq    %rbp
709         shrq    $32,%rcx
710         addq    %r8,%r9
711         adcq    %rcx,%r10
712         adcq    %rax,%r11
713         movq    %r9,%rax
714         adcq    $0,%rdx
715
716
717
718         movq    %r9,%rcx
719         shlq    $32,%r9
720         movq    %rdx,%r8
721         mulq    %rbp
722         shrq    $32,%rcx
723         addq    %r9,%r10
724         adcq    %rcx,%r11
725         adcq    %rax,%r8
726         movq    %r10,%rax
727         adcq    $0,%rdx
728
729
730
731         movq    %r10,%rcx
732         shlq    $32,%r10
733         movq    %rdx,%r9
734         mulq    %rbp
735         shrq    $32,%rcx
736         addq    %r10,%r11
737         adcq    %rcx,%r8
738         adcq    %rax,%r9
739         movq    %r11,%rax
740         adcq    $0,%rdx
741
742
743
744         movq    %r11,%rcx
745         shlq    $32,%r11
746         movq    %rdx,%r10
747         mulq    %rbp
748         shrq    $32,%rcx
749         addq    %r11,%r8
750         adcq    %rcx,%r9
751         adcq    %rax,%r10
752         adcq    $0,%rdx
753         xorq    %r11,%r11
754
755
756
757         addq    %r8,%r12
758         adcq    %r9,%r13
759         movq    %r12,%r8
760         adcq    %r10,%r14
761         adcq    %rdx,%r15
762         movq    %r13,%r9
763         adcq    $0,%r11
764
765         subq    $-1,%r12
766         movq    %r14,%r10
767         sbbq    %rsi,%r13
768         sbbq    $0,%r14
769         movq    %r15,%rcx
770         sbbq    %rbp,%r15
771         sbbq    $0,%r11
772
773         cmovcq  %r8,%r12
774         cmovcq  %r9,%r13
775         movq    %r12,0(%rdi)
776         cmovcq  %r10,%r14
777         movq    %r13,8(%rdi)
778         cmovcq  %rcx,%r15
779         movq    %r14,16(%rdi)
780         movq    %r15,24(%rdi)
781
782         .byte   0xf3,0xc3
783 .size   __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
784
785
786
787
788
789
790 .globl  ecp_nistz256_from_mont
791 .type   ecp_nistz256_from_mont,@function
792 .align  32
793 ecp_nistz256_from_mont:
794         pushq   %r12
795         pushq   %r13
796
797         movq    0(%rsi),%rax
798         movq    .Lpoly+24(%rip),%r13
799         movq    8(%rsi),%r9
800         movq    16(%rsi),%r10
801         movq    24(%rsi),%r11
802         movq    %rax,%r8
803         movq    .Lpoly+8(%rip),%r12
804
805
806
807         movq    %rax,%rcx
808         shlq    $32,%r8
809         mulq    %r13
810         shrq    $32,%rcx
811         addq    %r8,%r9
812         adcq    %rcx,%r10
813         adcq    %rax,%r11
814         movq    %r9,%rax
815         adcq    $0,%rdx
816
817
818
819         movq    %r9,%rcx
820         shlq    $32,%r9
821         movq    %rdx,%r8
822         mulq    %r13
823         shrq    $32,%rcx
824         addq    %r9,%r10
825         adcq    %rcx,%r11
826         adcq    %rax,%r8
827         movq    %r10,%rax
828         adcq    $0,%rdx
829
830
831
832         movq    %r10,%rcx
833         shlq    $32,%r10
834         movq    %rdx,%r9
835         mulq    %r13
836         shrq    $32,%rcx
837         addq    %r10,%r11
838         adcq    %rcx,%r8
839         adcq    %rax,%r9
840         movq    %r11,%rax
841         adcq    $0,%rdx
842
843
844
845         movq    %r11,%rcx
846         shlq    $32,%r11
847         movq    %rdx,%r10
848         mulq    %r13
849         shrq    $32,%rcx
850         addq    %r11,%r8
851         adcq    %rcx,%r9
852         movq    %r8,%rcx
853         adcq    %rax,%r10
854         movq    %r9,%rsi
855         adcq    $0,%rdx
856
857
858
859         subq    $-1,%r8
860         movq    %r10,%rax
861         sbbq    %r12,%r9
862         sbbq    $0,%r10
863         movq    %rdx,%r11
864         sbbq    %r13,%rdx
865         sbbq    %r13,%r13
866
867         cmovnzq %rcx,%r8
868         cmovnzq %rsi,%r9
869         movq    %r8,0(%rdi)
870         cmovnzq %rax,%r10
871         movq    %r9,8(%rdi)
872         cmovzq  %rdx,%r11
873         movq    %r10,16(%rdi)
874         movq    %r11,24(%rdi)
875
876         popq    %r13
877         popq    %r12
878         .byte   0xf3,0xc3
879 .size   ecp_nistz256_from_mont,.-ecp_nistz256_from_mont
880
881
882 .globl  ecp_nistz256_select_w5
883 .type   ecp_nistz256_select_w5,@function
884 .align  32
885 ecp_nistz256_select_w5:
886         movdqa  .LOne(%rip),%xmm0
887         movd    %edx,%xmm1
888
889         pxor    %xmm2,%xmm2
890         pxor    %xmm3,%xmm3
891         pxor    %xmm4,%xmm4
892         pxor    %xmm5,%xmm5
893         pxor    %xmm6,%xmm6
894         pxor    %xmm7,%xmm7
895
896         movdqa  %xmm0,%xmm8
897         pshufd  $0,%xmm1,%xmm1
898
899         movq    $16,%rax
900 .Lselect_loop_sse_w5:
901
902         movdqa  %xmm8,%xmm15
903         paddd   %xmm0,%xmm8
904         pcmpeqd %xmm1,%xmm15
905
906         movdqa  0(%rsi),%xmm9
907         movdqa  16(%rsi),%xmm10
908         movdqa  32(%rsi),%xmm11
909         movdqa  48(%rsi),%xmm12
910         movdqa  64(%rsi),%xmm13
911         movdqa  80(%rsi),%xmm14
912         leaq    96(%rsi),%rsi
913
914         pand    %xmm15,%xmm9
915         pand    %xmm15,%xmm10
916         por     %xmm9,%xmm2
917         pand    %xmm15,%xmm11
918         por     %xmm10,%xmm3
919         pand    %xmm15,%xmm12
920         por     %xmm11,%xmm4
921         pand    %xmm15,%xmm13
922         por     %xmm12,%xmm5
923         pand    %xmm15,%xmm14
924         por     %xmm13,%xmm6
925         por     %xmm14,%xmm7
926
927         decq    %rax
928         jnz     .Lselect_loop_sse_w5
929
930         movdqu  %xmm2,0(%rdi)
931         movdqu  %xmm3,16(%rdi)
932         movdqu  %xmm4,32(%rdi)
933         movdqu  %xmm5,48(%rdi)
934         movdqu  %xmm6,64(%rdi)
935         movdqu  %xmm7,80(%rdi)
936         .byte   0xf3,0xc3
937 .size   ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
938
939
940
941 .globl  ecp_nistz256_select_w7
942 .type   ecp_nistz256_select_w7,@function
943 .align  32
944 ecp_nistz256_select_w7:
945         movdqa  .LOne(%rip),%xmm8
946         movd    %edx,%xmm1
947
948         pxor    %xmm2,%xmm2
949         pxor    %xmm3,%xmm3
950         pxor    %xmm4,%xmm4
951         pxor    %xmm5,%xmm5
952
953         movdqa  %xmm8,%xmm0
954         pshufd  $0,%xmm1,%xmm1
955         movq    $64,%rax
956
957 .Lselect_loop_sse_w7:
958         movdqa  %xmm8,%xmm15
959         paddd   %xmm0,%xmm8
960         movdqa  0(%rsi),%xmm9
961         movdqa  16(%rsi),%xmm10
962         pcmpeqd %xmm1,%xmm15
963         movdqa  32(%rsi),%xmm11
964         movdqa  48(%rsi),%xmm12
965         leaq    64(%rsi),%rsi
966
967         pand    %xmm15,%xmm9
968         pand    %xmm15,%xmm10
969         por     %xmm9,%xmm2
970         pand    %xmm15,%xmm11
971         por     %xmm10,%xmm3
972         pand    %xmm15,%xmm12
973         por     %xmm11,%xmm4
974         prefetcht0      255(%rsi)
975         por     %xmm12,%xmm5
976
977         decq    %rax
978         jnz     .Lselect_loop_sse_w7
979
980         movdqu  %xmm2,0(%rdi)
981         movdqu  %xmm3,16(%rdi)
982         movdqu  %xmm4,32(%rdi)
983         movdqu  %xmm5,48(%rdi)
984         .byte   0xf3,0xc3
985 .size   ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
986 .globl  ecp_nistz256_avx2_select_w7
987 .type   ecp_nistz256_avx2_select_w7,@function
988 .align  32
989 ecp_nistz256_avx2_select_w7:
990 .byte   0x0f,0x0b
991         .byte   0xf3,0xc3
992 .size   ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
993 .type   __ecp_nistz256_add_toq,@function
994 .align  32
995 __ecp_nistz256_add_toq:
996         addq    0(%rbx),%r12
997         adcq    8(%rbx),%r13
998         movq    %r12,%rax
999         adcq    16(%rbx),%r8
1000         adcq    24(%rbx),%r9
1001         movq    %r13,%rbp
1002         sbbq    %r11,%r11
1003
1004         subq    $-1,%r12
1005         movq    %r8,%rcx
1006         sbbq    %r14,%r13
1007         sbbq    $0,%r8
1008         movq    %r9,%r10
1009         sbbq    %r15,%r9
1010         testq   %r11,%r11
1011
1012         cmovzq  %rax,%r12
1013         cmovzq  %rbp,%r13
1014         movq    %r12,0(%rdi)
1015         cmovzq  %rcx,%r8
1016         movq    %r13,8(%rdi)
1017         cmovzq  %r10,%r9
1018         movq    %r8,16(%rdi)
1019         movq    %r9,24(%rdi)
1020
1021         .byte   0xf3,0xc3
1022 .size   __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
1023
1024 .type   __ecp_nistz256_sub_fromq,@function
1025 .align  32
1026 __ecp_nistz256_sub_fromq:
1027         subq    0(%rbx),%r12
1028         sbbq    8(%rbx),%r13
1029         movq    %r12,%rax
1030         sbbq    16(%rbx),%r8
1031         sbbq    24(%rbx),%r9
1032         movq    %r13,%rbp
1033         sbbq    %r11,%r11
1034
1035         addq    $-1,%r12
1036         movq    %r8,%rcx
1037         adcq    %r14,%r13
1038         adcq    $0,%r8
1039         movq    %r9,%r10
1040         adcq    %r15,%r9
1041         testq   %r11,%r11
1042
1043         cmovzq  %rax,%r12
1044         cmovzq  %rbp,%r13
1045         movq    %r12,0(%rdi)
1046         cmovzq  %rcx,%r8
1047         movq    %r13,8(%rdi)
1048         cmovzq  %r10,%r9
1049         movq    %r8,16(%rdi)
1050         movq    %r9,24(%rdi)
1051
1052         .byte   0xf3,0xc3
1053 .size   __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
1054
1055 .type   __ecp_nistz256_subq,@function
1056 .align  32
1057 __ecp_nistz256_subq:
1058         subq    %r12,%rax
1059         sbbq    %r13,%rbp
1060         movq    %rax,%r12
1061         sbbq    %r8,%rcx
1062         sbbq    %r9,%r10
1063         movq    %rbp,%r13
1064         sbbq    %r11,%r11
1065
1066         addq    $-1,%rax
1067         movq    %rcx,%r8
1068         adcq    %r14,%rbp
1069         adcq    $0,%rcx
1070         movq    %r10,%r9
1071         adcq    %r15,%r10
1072         testq   %r11,%r11
1073
1074         cmovnzq %rax,%r12
1075         cmovnzq %rbp,%r13
1076         cmovnzq %rcx,%r8
1077         cmovnzq %r10,%r9
1078
1079         .byte   0xf3,0xc3
1080 .size   __ecp_nistz256_subq,.-__ecp_nistz256_subq
1081
1082 .type   __ecp_nistz256_mul_by_2q,@function
1083 .align  32
1084 __ecp_nistz256_mul_by_2q:
1085         addq    %r12,%r12
1086         adcq    %r13,%r13
1087         movq    %r12,%rax
1088         adcq    %r8,%r8
1089         adcq    %r9,%r9
1090         movq    %r13,%rbp
1091         sbbq    %r11,%r11
1092
1093         subq    $-1,%r12
1094         movq    %r8,%rcx
1095         sbbq    %r14,%r13
1096         sbbq    $0,%r8
1097         movq    %r9,%r10
1098         sbbq    %r15,%r9
1099         testq   %r11,%r11
1100
1101         cmovzq  %rax,%r12
1102         cmovzq  %rbp,%r13
1103         movq    %r12,0(%rdi)
1104         cmovzq  %rcx,%r8
1105         movq    %r13,8(%rdi)
1106         cmovzq  %r10,%r9
1107         movq    %r8,16(%rdi)
1108         movq    %r9,24(%rdi)
1109
1110         .byte   0xf3,0xc3
1111 .size   __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
1112 .globl  ecp_nistz256_point_double
1113 .type   ecp_nistz256_point_double,@function
1114 .align  32
1115 ecp_nistz256_point_double:
1116         pushq   %rbp
1117         pushq   %rbx
1118         pushq   %r12
1119         pushq   %r13
1120         pushq   %r14
1121         pushq   %r15
1122         subq    $160+8,%rsp
1123
1124 .Lpoint_double_shortcutq:
1125         movdqu  0(%rsi),%xmm0
1126         movq    %rsi,%rbx
1127         movdqu  16(%rsi),%xmm1
1128         movq    32+0(%rsi),%r12
1129         movq    32+8(%rsi),%r13
1130         movq    32+16(%rsi),%r8
1131         movq    32+24(%rsi),%r9
1132         movq    .Lpoly+8(%rip),%r14
1133         movq    .Lpoly+24(%rip),%r15
1134         movdqa  %xmm0,96(%rsp)
1135         movdqa  %xmm1,96+16(%rsp)
1136         leaq    32(%rdi),%r10
1137         leaq    64(%rdi),%r11
1138 .byte   102,72,15,110,199
1139 .byte   102,73,15,110,202
1140 .byte   102,73,15,110,211
1141
1142         leaq    0(%rsp),%rdi
1143         call    __ecp_nistz256_mul_by_2q
1144
1145         movq    64+0(%rsi),%rax
1146         movq    64+8(%rsi),%r14
1147         movq    64+16(%rsi),%r15
1148         movq    64+24(%rsi),%r8
1149         leaq    64-0(%rsi),%rsi
1150         leaq    64(%rsp),%rdi
1151         call    __ecp_nistz256_sqr_montq
1152
1153         movq    0+0(%rsp),%rax
1154         movq    8+0(%rsp),%r14
1155         leaq    0+0(%rsp),%rsi
1156         movq    16+0(%rsp),%r15
1157         movq    24+0(%rsp),%r8
1158         leaq    0(%rsp),%rdi
1159         call    __ecp_nistz256_sqr_montq
1160
1161         movq    32(%rbx),%rax
1162         movq    64+0(%rbx),%r9
1163         movq    64+8(%rbx),%r10
1164         movq    64+16(%rbx),%r11
1165         movq    64+24(%rbx),%r12
1166         leaq    64-0(%rbx),%rsi
1167         leaq    32(%rbx),%rbx
1168 .byte   102,72,15,126,215
1169         call    __ecp_nistz256_mul_montq
1170         call    __ecp_nistz256_mul_by_2q
1171
1172         movq    96+0(%rsp),%r12
1173         movq    96+8(%rsp),%r13
1174         leaq    64(%rsp),%rbx
1175         movq    96+16(%rsp),%r8
1176         movq    96+24(%rsp),%r9
1177         leaq    32(%rsp),%rdi
1178         call    __ecp_nistz256_add_toq
1179
1180         movq    96+0(%rsp),%r12
1181         movq    96+8(%rsp),%r13
1182         leaq    64(%rsp),%rbx
1183         movq    96+16(%rsp),%r8
1184         movq    96+24(%rsp),%r9
1185         leaq    64(%rsp),%rdi
1186         call    __ecp_nistz256_sub_fromq
1187
1188         movq    0+0(%rsp),%rax
1189         movq    8+0(%rsp),%r14
1190         leaq    0+0(%rsp),%rsi
1191         movq    16+0(%rsp),%r15
1192         movq    24+0(%rsp),%r8
1193 .byte   102,72,15,126,207
1194         call    __ecp_nistz256_sqr_montq
1195         xorq    %r9,%r9
1196         movq    %r12,%rax
1197         addq    $-1,%r12
1198         movq    %r13,%r10
1199         adcq    %rsi,%r13
1200         movq    %r14,%rcx
1201         adcq    $0,%r14
1202         movq    %r15,%r8
1203         adcq    %rbp,%r15
1204         adcq    $0,%r9
1205         xorq    %rsi,%rsi
1206         testq   $1,%rax
1207
1208         cmovzq  %rax,%r12
1209         cmovzq  %r10,%r13
1210         cmovzq  %rcx,%r14
1211         cmovzq  %r8,%r15
1212         cmovzq  %rsi,%r9
1213
1214         movq    %r13,%rax
1215         shrq    $1,%r12
1216         shlq    $63,%rax
1217         movq    %r14,%r10
1218         shrq    $1,%r13
1219         orq     %rax,%r12
1220         shlq    $63,%r10
1221         movq    %r15,%rcx
1222         shrq    $1,%r14
1223         orq     %r10,%r13
1224         shlq    $63,%rcx
1225         movq    %r12,0(%rdi)
1226         shrq    $1,%r15
1227         movq    %r13,8(%rdi)
1228         shlq    $63,%r9
1229         orq     %rcx,%r14
1230         orq     %r9,%r15
1231         movq    %r14,16(%rdi)
1232         movq    %r15,24(%rdi)
1233         movq    64(%rsp),%rax
1234         leaq    64(%rsp),%rbx
1235         movq    0+32(%rsp),%r9
1236         movq    8+32(%rsp),%r10
1237         leaq    0+32(%rsp),%rsi
1238         movq    16+32(%rsp),%r11
1239         movq    24+32(%rsp),%r12
1240         leaq    32(%rsp),%rdi
1241         call    __ecp_nistz256_mul_montq
1242
1243         leaq    128(%rsp),%rdi
1244         call    __ecp_nistz256_mul_by_2q
1245
1246         leaq    32(%rsp),%rbx
1247         leaq    32(%rsp),%rdi
1248         call    __ecp_nistz256_add_toq
1249
1250         movq    96(%rsp),%rax
1251         leaq    96(%rsp),%rbx
1252         movq    0+0(%rsp),%r9
1253         movq    8+0(%rsp),%r10
1254         leaq    0+0(%rsp),%rsi
1255         movq    16+0(%rsp),%r11
1256         movq    24+0(%rsp),%r12
1257         leaq    0(%rsp),%rdi
1258         call    __ecp_nistz256_mul_montq
1259
1260         leaq    128(%rsp),%rdi
1261         call    __ecp_nistz256_mul_by_2q
1262
1263         movq    0+32(%rsp),%rax
1264         movq    8+32(%rsp),%r14
1265         leaq    0+32(%rsp),%rsi
1266         movq    16+32(%rsp),%r15
1267         movq    24+32(%rsp),%r8
1268 .byte   102,72,15,126,199
1269         call    __ecp_nistz256_sqr_montq
1270
1271         leaq    128(%rsp),%rbx
1272         movq    %r14,%r8
1273         movq    %r15,%r9
1274         movq    %rsi,%r14
1275         movq    %rbp,%r15
1276         call    __ecp_nistz256_sub_fromq
1277
1278         movq    0+0(%rsp),%rax
1279         movq    0+8(%rsp),%rbp
1280         movq    0+16(%rsp),%rcx
1281         movq    0+24(%rsp),%r10
1282         leaq    0(%rsp),%rdi
1283         call    __ecp_nistz256_subq
1284
1285         movq    32(%rsp),%rax
1286         leaq    32(%rsp),%rbx
1287         movq    %r12,%r14
1288         xorl    %ecx,%ecx
1289         movq    %r12,0+0(%rsp)
1290         movq    %r13,%r10
1291         movq    %r13,0+8(%rsp)
1292         cmovzq  %r8,%r11
1293         movq    %r8,0+16(%rsp)
1294         leaq    0-0(%rsp),%rsi
1295         cmovzq  %r9,%r12
1296         movq    %r9,0+24(%rsp)
1297         movq    %r14,%r9
1298         leaq    0(%rsp),%rdi
1299         call    __ecp_nistz256_mul_montq
1300
1301 .byte   102,72,15,126,203
1302 .byte   102,72,15,126,207
1303         call    __ecp_nistz256_sub_fromq
1304
1305         addq    $160+8,%rsp
1306         popq    %r15
1307         popq    %r14
1308         popq    %r13
1309         popq    %r12
1310         popq    %rbx
1311         popq    %rbp
1312         .byte   0xf3,0xc3
1313 .size   ecp_nistz256_point_double,.-ecp_nistz256_point_double
1314 .globl  ecp_nistz256_point_add
1315 .type   ecp_nistz256_point_add,@function
1316 .align  32
1317 ecp_nistz256_point_add:
1318         pushq   %rbp
1319         pushq   %rbx
1320         pushq   %r12
1321         pushq   %r13
1322         pushq   %r14
1323         pushq   %r15
1324         subq    $576+8,%rsp
1325
1326         movdqu  0(%rsi),%xmm0
1327         movdqu  16(%rsi),%xmm1
1328         movdqu  32(%rsi),%xmm2
1329         movdqu  48(%rsi),%xmm3
1330         movdqu  64(%rsi),%xmm4
1331         movdqu  80(%rsi),%xmm5
1332         movq    %rsi,%rbx
1333         movq    %rdx,%rsi
1334         movdqa  %xmm0,384(%rsp)
1335         movdqa  %xmm1,384+16(%rsp)
1336         por     %xmm0,%xmm1
1337         movdqa  %xmm2,416(%rsp)
1338         movdqa  %xmm3,416+16(%rsp)
1339         por     %xmm2,%xmm3
1340         movdqa  %xmm4,448(%rsp)
1341         movdqa  %xmm5,448+16(%rsp)
1342         por     %xmm1,%xmm3
1343
1344         movdqu  0(%rsi),%xmm0
1345         pshufd  $0xb1,%xmm3,%xmm5
1346         movdqu  16(%rsi),%xmm1
1347         movdqu  32(%rsi),%xmm2
1348         por     %xmm3,%xmm5
1349         movdqu  48(%rsi),%xmm3
1350         movq    64+0(%rsi),%rax
1351         movq    64+8(%rsi),%r14
1352         movq    64+16(%rsi),%r15
1353         movq    64+24(%rsi),%r8
1354         movdqa  %xmm0,480(%rsp)
1355         pshufd  $0x1e,%xmm5,%xmm4
1356         movdqa  %xmm1,480+16(%rsp)
1357         por     %xmm0,%xmm1
1358 .byte   102,72,15,110,199
1359         movdqa  %xmm2,512(%rsp)
1360         movdqa  %xmm3,512+16(%rsp)
1361         por     %xmm2,%xmm3
1362         por     %xmm4,%xmm5
1363         pxor    %xmm4,%xmm4
1364         por     %xmm1,%xmm3
1365
1366         leaq    64-0(%rsi),%rsi
1367         movq    %rax,544+0(%rsp)
1368         movq    %r14,544+8(%rsp)
1369         movq    %r15,544+16(%rsp)
1370         movq    %r8,544+24(%rsp)
1371         leaq    96(%rsp),%rdi
1372         call    __ecp_nistz256_sqr_montq
1373
1374         pcmpeqd %xmm4,%xmm5
1375         pshufd  $0xb1,%xmm3,%xmm4
1376         por     %xmm3,%xmm4
1377         pshufd  $0,%xmm5,%xmm5
1378         pshufd  $0x1e,%xmm4,%xmm3
1379         por     %xmm3,%xmm4
1380         pxor    %xmm3,%xmm3
1381         pcmpeqd %xmm3,%xmm4
1382         pshufd  $0,%xmm4,%xmm4
1383         movq    64+0(%rbx),%rax
1384         movq    64+8(%rbx),%r14
1385         movq    64+16(%rbx),%r15
1386         movq    64+24(%rbx),%r8
1387 .byte   102,72,15,110,203
1388
1389         leaq    64-0(%rbx),%rsi
1390         leaq    32(%rsp),%rdi
1391         call    __ecp_nistz256_sqr_montq
1392
1393         movq    544(%rsp),%rax
1394         leaq    544(%rsp),%rbx
1395         movq    0+96(%rsp),%r9
1396         movq    8+96(%rsp),%r10
1397         leaq    0+96(%rsp),%rsi
1398         movq    16+96(%rsp),%r11
1399         movq    24+96(%rsp),%r12
1400         leaq    224(%rsp),%rdi
1401         call    __ecp_nistz256_mul_montq
1402
1403         movq    448(%rsp),%rax
1404         leaq    448(%rsp),%rbx
1405         movq    0+32(%rsp),%r9
1406         movq    8+32(%rsp),%r10
1407         leaq    0+32(%rsp),%rsi
1408         movq    16+32(%rsp),%r11
1409         movq    24+32(%rsp),%r12
1410         leaq    256(%rsp),%rdi
1411         call    __ecp_nistz256_mul_montq
1412
1413         movq    416(%rsp),%rax
1414         leaq    416(%rsp),%rbx
1415         movq    0+224(%rsp),%r9
1416         movq    8+224(%rsp),%r10
1417         leaq    0+224(%rsp),%rsi
1418         movq    16+224(%rsp),%r11
1419         movq    24+224(%rsp),%r12
1420         leaq    224(%rsp),%rdi
1421         call    __ecp_nistz256_mul_montq
1422
1423         movq    512(%rsp),%rax
1424         leaq    512(%rsp),%rbx
1425         movq    0+256(%rsp),%r9
1426         movq    8+256(%rsp),%r10
1427         leaq    0+256(%rsp),%rsi
1428         movq    16+256(%rsp),%r11
1429         movq    24+256(%rsp),%r12
1430         leaq    256(%rsp),%rdi
1431         call    __ecp_nistz256_mul_montq
1432
1433         leaq    224(%rsp),%rbx
1434         leaq    64(%rsp),%rdi
1435         call    __ecp_nistz256_sub_fromq
1436
1437         orq     %r13,%r12
1438         movdqa  %xmm4,%xmm2
1439         orq     %r8,%r12
1440         orq     %r9,%r12
1441         por     %xmm5,%xmm2
1442 .byte   102,73,15,110,220
1443
1444         movq    384(%rsp),%rax
1445         leaq    384(%rsp),%rbx
1446         movq    0+96(%rsp),%r9
1447         movq    8+96(%rsp),%r10
1448         leaq    0+96(%rsp),%rsi
1449         movq    16+96(%rsp),%r11
1450         movq    24+96(%rsp),%r12
1451         leaq    160(%rsp),%rdi
1452         call    __ecp_nistz256_mul_montq
1453
1454         movq    480(%rsp),%rax
1455         leaq    480(%rsp),%rbx
1456         movq    0+32(%rsp),%r9
1457         movq    8+32(%rsp),%r10
1458         leaq    0+32(%rsp),%rsi
1459         movq    16+32(%rsp),%r11
1460         movq    24+32(%rsp),%r12
1461         leaq    192(%rsp),%rdi
1462         call    __ecp_nistz256_mul_montq
1463
1464         leaq    160(%rsp),%rbx
1465         leaq    0(%rsp),%rdi
1466         call    __ecp_nistz256_sub_fromq
1467
1468         orq     %r13,%r12
1469         orq     %r8,%r12
1470         orq     %r9,%r12
1471
1472 .byte   0x3e
1473         jnz     .Ladd_proceedq
1474 .byte   102,73,15,126,208
1475 .byte   102,73,15,126,217
1476         testq   %r8,%r8
1477         jnz     .Ladd_proceedq
1478         testq   %r9,%r9
1479         jz      .Ladd_doubleq
1480
1481 .byte   102,72,15,126,199
1482         pxor    %xmm0,%xmm0
1483         movdqu  %xmm0,0(%rdi)
1484         movdqu  %xmm0,16(%rdi)
1485         movdqu  %xmm0,32(%rdi)
1486         movdqu  %xmm0,48(%rdi)
1487         movdqu  %xmm0,64(%rdi)
1488         movdqu  %xmm0,80(%rdi)
1489         jmp     .Ladd_doneq
1490
1491 .align  32
1492 .Ladd_doubleq:
1493 .byte   102,72,15,126,206
1494 .byte   102,72,15,126,199
1495         addq    $416,%rsp
1496         jmp     .Lpoint_double_shortcutq
1497
1498 .align  32
1499 .Ladd_proceedq:
1500         movq    0+64(%rsp),%rax
1501         movq    8+64(%rsp),%r14
1502         leaq    0+64(%rsp),%rsi
1503         movq    16+64(%rsp),%r15
1504         movq    24+64(%rsp),%r8
1505         leaq    96(%rsp),%rdi
1506         call    __ecp_nistz256_sqr_montq
1507
1508         movq    448(%rsp),%rax
1509         leaq    448(%rsp),%rbx
1510         movq    0+0(%rsp),%r9
1511         movq    8+0(%rsp),%r10
1512         leaq    0+0(%rsp),%rsi
1513         movq    16+0(%rsp),%r11
1514         movq    24+0(%rsp),%r12
1515         leaq    352(%rsp),%rdi
1516         call    __ecp_nistz256_mul_montq
1517
1518         movq    0+0(%rsp),%rax
1519         movq    8+0(%rsp),%r14
1520         leaq    0+0(%rsp),%rsi
1521         movq    16+0(%rsp),%r15
1522         movq    24+0(%rsp),%r8
1523         leaq    32(%rsp),%rdi
1524         call    __ecp_nistz256_sqr_montq
1525
1526         movq    544(%rsp),%rax
1527         leaq    544(%rsp),%rbx
1528         movq    0+352(%rsp),%r9
1529         movq    8+352(%rsp),%r10
1530         leaq    0+352(%rsp),%rsi
1531         movq    16+352(%rsp),%r11
1532         movq    24+352(%rsp),%r12
1533         leaq    352(%rsp),%rdi
1534         call    __ecp_nistz256_mul_montq
1535
1536         movq    0(%rsp),%rax
1537         leaq    0(%rsp),%rbx
1538         movq    0+32(%rsp),%r9
1539         movq    8+32(%rsp),%r10
1540         leaq    0+32(%rsp),%rsi
1541         movq    16+32(%rsp),%r11
1542         movq    24+32(%rsp),%r12
1543         leaq    128(%rsp),%rdi
1544         call    __ecp_nistz256_mul_montq
1545
1546         movq    160(%rsp),%rax
1547         leaq    160(%rsp),%rbx
1548         movq    0+32(%rsp),%r9
1549         movq    8+32(%rsp),%r10
1550         leaq    0+32(%rsp),%rsi
1551         movq    16+32(%rsp),%r11
1552         movq    24+32(%rsp),%r12
1553         leaq    192(%rsp),%rdi
1554         call    __ecp_nistz256_mul_montq
1555
1556
1557
1558
1559         addq    %r12,%r12
1560         leaq    96(%rsp),%rsi
1561         adcq    %r13,%r13
1562         movq    %r12,%rax
1563         adcq    %r8,%r8
1564         adcq    %r9,%r9
1565         movq    %r13,%rbp
1566         sbbq    %r11,%r11
1567
1568         subq    $-1,%r12
1569         movq    %r8,%rcx
1570         sbbq    %r14,%r13
1571         sbbq    $0,%r8
1572         movq    %r9,%r10
1573         sbbq    %r15,%r9
1574         testq   %r11,%r11
1575
1576         cmovzq  %rax,%r12
1577         movq    0(%rsi),%rax
1578         cmovzq  %rbp,%r13
1579         movq    8(%rsi),%rbp
1580         cmovzq  %rcx,%r8
1581         movq    16(%rsi),%rcx
1582         cmovzq  %r10,%r9
1583         movq    24(%rsi),%r10
1584
1585         call    __ecp_nistz256_subq
1586
1587         leaq    128(%rsp),%rbx
1588         leaq    288(%rsp),%rdi
1589         call    __ecp_nistz256_sub_fromq
1590
1591         movq    192+0(%rsp),%rax
1592         movq    192+8(%rsp),%rbp
1593         movq    192+16(%rsp),%rcx
1594         movq    192+24(%rsp),%r10
1595         leaq    320(%rsp),%rdi
1596
1597         call    __ecp_nistz256_subq
1598
1599         movq    %r12,0(%rdi)
1600         movq    %r13,8(%rdi)
1601         movq    %r8,16(%rdi)
1602         movq    %r9,24(%rdi)
1603         movq    128(%rsp),%rax
1604         leaq    128(%rsp),%rbx
1605         movq    0+224(%rsp),%r9
1606         movq    8+224(%rsp),%r10
1607         leaq    0+224(%rsp),%rsi
1608         movq    16+224(%rsp),%r11
1609         movq    24+224(%rsp),%r12
1610         leaq    256(%rsp),%rdi
1611         call    __ecp_nistz256_mul_montq
1612
1613         movq    320(%rsp),%rax
1614         leaq    320(%rsp),%rbx
1615         movq    0+64(%rsp),%r9
1616         movq    8+64(%rsp),%r10
1617         leaq    0+64(%rsp),%rsi
1618         movq    16+64(%rsp),%r11
1619         movq    24+64(%rsp),%r12
1620         leaq    320(%rsp),%rdi
1621         call    __ecp_nistz256_mul_montq
1622
1623         leaq    256(%rsp),%rbx
1624         leaq    320(%rsp),%rdi
1625         call    __ecp_nistz256_sub_fromq
1626
1627 .byte   102,72,15,126,199
1628
1629         movdqa  %xmm5,%xmm0
1630         movdqa  %xmm5,%xmm1
1631         pandn   352(%rsp),%xmm0
1632         movdqa  %xmm5,%xmm2
1633         pandn   352+16(%rsp),%xmm1
1634         movdqa  %xmm5,%xmm3
1635         pand    544(%rsp),%xmm2
1636         pand    544+16(%rsp),%xmm3
1637         por     %xmm0,%xmm2
1638         por     %xmm1,%xmm3
1639
1640         movdqa  %xmm4,%xmm0
1641         movdqa  %xmm4,%xmm1
1642         pandn   %xmm2,%xmm0
1643         movdqa  %xmm4,%xmm2
1644         pandn   %xmm3,%xmm1
1645         movdqa  %xmm4,%xmm3
1646         pand    448(%rsp),%xmm2
1647         pand    448+16(%rsp),%xmm3
1648         por     %xmm0,%xmm2
1649         por     %xmm1,%xmm3
1650         movdqu  %xmm2,64(%rdi)
1651         movdqu  %xmm3,80(%rdi)
1652
1653         movdqa  %xmm5,%xmm0
1654         movdqa  %xmm5,%xmm1
1655         pandn   288(%rsp),%xmm0
1656         movdqa  %xmm5,%xmm2
1657         pandn   288+16(%rsp),%xmm1
1658         movdqa  %xmm5,%xmm3
1659         pand    480(%rsp),%xmm2
1660         pand    480+16(%rsp),%xmm3
1661         por     %xmm0,%xmm2
1662         por     %xmm1,%xmm3
1663
1664         movdqa  %xmm4,%xmm0
1665         movdqa  %xmm4,%xmm1
1666         pandn   %xmm2,%xmm0
1667         movdqa  %xmm4,%xmm2
1668         pandn   %xmm3,%xmm1
1669         movdqa  %xmm4,%xmm3
1670         pand    384(%rsp),%xmm2
1671         pand    384+16(%rsp),%xmm3
1672         por     %xmm0,%xmm2
1673         por     %xmm1,%xmm3
1674         movdqu  %xmm2,0(%rdi)
1675         movdqu  %xmm3,16(%rdi)
1676
1677         movdqa  %xmm5,%xmm0
1678         movdqa  %xmm5,%xmm1
1679         pandn   320(%rsp),%xmm0
1680         movdqa  %xmm5,%xmm2
1681         pandn   320+16(%rsp),%xmm1
1682         movdqa  %xmm5,%xmm3
1683         pand    512(%rsp),%xmm2
1684         pand    512+16(%rsp),%xmm3
1685         por     %xmm0,%xmm2
1686         por     %xmm1,%xmm3
1687
1688         movdqa  %xmm4,%xmm0
1689         movdqa  %xmm4,%xmm1
1690         pandn   %xmm2,%xmm0
1691         movdqa  %xmm4,%xmm2
1692         pandn   %xmm3,%xmm1
1693         movdqa  %xmm4,%xmm3
1694         pand    416(%rsp),%xmm2
1695         pand    416+16(%rsp),%xmm3
1696         por     %xmm0,%xmm2
1697         por     %xmm1,%xmm3
1698         movdqu  %xmm2,32(%rdi)
1699         movdqu  %xmm3,48(%rdi)
1700
1701 .Ladd_doneq:
1702         addq    $576+8,%rsp
1703         popq    %r15
1704         popq    %r14
1705         popq    %r13
1706         popq    %r12
1707         popq    %rbx
1708         popq    %rbp
1709         .byte   0xf3,0xc3
1710 .size   ecp_nistz256_point_add,.-ecp_nistz256_point_add
1711 .globl  ecp_nistz256_point_add_affine
1712 .type   ecp_nistz256_point_add_affine,@function
1713 .align  32
1714 ecp_nistz256_point_add_affine:
1715         pushq   %rbp
1716         pushq   %rbx
1717         pushq   %r12
1718         pushq   %r13
1719         pushq   %r14
1720         pushq   %r15
1721         subq    $480+8,%rsp
1722
1723         movdqu  0(%rsi),%xmm0
1724         movq    %rdx,%rbx
1725         movdqu  16(%rsi),%xmm1
1726         movdqu  32(%rsi),%xmm2
1727         movdqu  48(%rsi),%xmm3
1728         movdqu  64(%rsi),%xmm4
1729         movdqu  80(%rsi),%xmm5
1730         movq    64+0(%rsi),%rax
1731         movq    64+8(%rsi),%r14
1732         movq    64+16(%rsi),%r15
1733         movq    64+24(%rsi),%r8
1734         movdqa  %xmm0,320(%rsp)
1735         movdqa  %xmm1,320+16(%rsp)
1736         por     %xmm0,%xmm1
1737         movdqa  %xmm2,352(%rsp)
1738         movdqa  %xmm3,352+16(%rsp)
1739         por     %xmm2,%xmm3
1740         movdqa  %xmm4,384(%rsp)
1741         movdqa  %xmm5,384+16(%rsp)
1742         por     %xmm1,%xmm3
1743
1744         movdqu  0(%rbx),%xmm0
1745         pshufd  $0xb1,%xmm3,%xmm5
1746         movdqu  16(%rbx),%xmm1
1747         movdqu  32(%rbx),%xmm2
1748         por     %xmm3,%xmm5
1749         movdqu  48(%rbx),%xmm3
1750         movdqa  %xmm0,416(%rsp)
1751         pshufd  $0x1e,%xmm5,%xmm4
1752         movdqa  %xmm1,416+16(%rsp)
1753         por     %xmm0,%xmm1
1754 .byte   102,72,15,110,199
1755         movdqa  %xmm2,448(%rsp)
1756         movdqa  %xmm3,448+16(%rsp)
1757         por     %xmm2,%xmm3
1758         por     %xmm4,%xmm5
1759         pxor    %xmm4,%xmm4
1760         por     %xmm1,%xmm3
1761
1762         leaq    64-0(%rsi),%rsi
1763         leaq    32(%rsp),%rdi
1764         call    __ecp_nistz256_sqr_montq
1765
1766         pcmpeqd %xmm4,%xmm5
1767         pshufd  $0xb1,%xmm3,%xmm4
1768         movq    0(%rbx),%rax
1769
1770         movq    %r12,%r9
1771         por     %xmm3,%xmm4
1772         pshufd  $0,%xmm5,%xmm5
1773         pshufd  $0x1e,%xmm4,%xmm3
1774         movq    %r13,%r10
1775         por     %xmm3,%xmm4
1776         pxor    %xmm3,%xmm3
1777         movq    %r14,%r11
1778         pcmpeqd %xmm3,%xmm4
1779         pshufd  $0,%xmm4,%xmm4
1780
1781         leaq    32-0(%rsp),%rsi
1782         movq    %r15,%r12
1783         leaq    0(%rsp),%rdi
1784         call    __ecp_nistz256_mul_montq
1785
1786         leaq    320(%rsp),%rbx
1787         leaq    64(%rsp),%rdi
1788         call    __ecp_nistz256_sub_fromq
1789
1790         movq    384(%rsp),%rax
1791         leaq    384(%rsp),%rbx
1792         movq    0+32(%rsp),%r9
1793         movq    8+32(%rsp),%r10
1794         leaq    0+32(%rsp),%rsi
1795         movq    16+32(%rsp),%r11
1796         movq    24+32(%rsp),%r12
1797         leaq    32(%rsp),%rdi
1798         call    __ecp_nistz256_mul_montq
1799
1800         movq    384(%rsp),%rax
1801         leaq    384(%rsp),%rbx
1802         movq    0+64(%rsp),%r9
1803         movq    8+64(%rsp),%r10
1804         leaq    0+64(%rsp),%rsi
1805         movq    16+64(%rsp),%r11
1806         movq    24+64(%rsp),%r12
1807         leaq    288(%rsp),%rdi
1808         call    __ecp_nistz256_mul_montq
1809
1810         movq    448(%rsp),%rax
1811         leaq    448(%rsp),%rbx
1812         movq    0+32(%rsp),%r9
1813         movq    8+32(%rsp),%r10
1814         leaq    0+32(%rsp),%rsi
1815         movq    16+32(%rsp),%r11
1816         movq    24+32(%rsp),%r12
1817         leaq    32(%rsp),%rdi
1818         call    __ecp_nistz256_mul_montq
1819
1820         leaq    352(%rsp),%rbx
1821         leaq    96(%rsp),%rdi
1822         call    __ecp_nistz256_sub_fromq
1823
1824         movq    0+64(%rsp),%rax
1825         movq    8+64(%rsp),%r14
1826         leaq    0+64(%rsp),%rsi
1827         movq    16+64(%rsp),%r15
1828         movq    24+64(%rsp),%r8
1829         leaq    128(%rsp),%rdi
1830         call    __ecp_nistz256_sqr_montq
1831
1832         movq    0+96(%rsp),%rax
1833         movq    8+96(%rsp),%r14
1834         leaq    0+96(%rsp),%rsi
1835         movq    16+96(%rsp),%r15
1836         movq    24+96(%rsp),%r8
1837         leaq    192(%rsp),%rdi
1838         call    __ecp_nistz256_sqr_montq
1839
1840         movq    128(%rsp),%rax
1841         leaq    128(%rsp),%rbx
1842         movq    0+64(%rsp),%r9
1843         movq    8+64(%rsp),%r10
1844         leaq    0+64(%rsp),%rsi
1845         movq    16+64(%rsp),%r11
1846         movq    24+64(%rsp),%r12
1847         leaq    160(%rsp),%rdi
1848         call    __ecp_nistz256_mul_montq
1849
1850         movq    320(%rsp),%rax
1851         leaq    320(%rsp),%rbx
1852         movq    0+128(%rsp),%r9
1853         movq    8+128(%rsp),%r10
1854         leaq    0+128(%rsp),%rsi
1855         movq    16+128(%rsp),%r11
1856         movq    24+128(%rsp),%r12
1857         leaq    0(%rsp),%rdi
1858         call    __ecp_nistz256_mul_montq
1859
1860
1861
1862
1863         addq    %r12,%r12
1864         leaq    192(%rsp),%rsi
1865         adcq    %r13,%r13
1866         movq    %r12,%rax
1867         adcq    %r8,%r8
1868         adcq    %r9,%r9
1869         movq    %r13,%rbp
1870         sbbq    %r11,%r11
1871
1872         subq    $-1,%r12
1873         movq    %r8,%rcx
1874         sbbq    %r14,%r13
1875         sbbq    $0,%r8
1876         movq    %r9,%r10
1877         sbbq    %r15,%r9
1878         testq   %r11,%r11
1879
1880         cmovzq  %rax,%r12
1881         movq    0(%rsi),%rax
1882         cmovzq  %rbp,%r13
1883         movq    8(%rsi),%rbp
1884         cmovzq  %rcx,%r8
1885         movq    16(%rsi),%rcx
1886         cmovzq  %r10,%r9
1887         movq    24(%rsi),%r10
1888
1889         call    __ecp_nistz256_subq
1890
1891         leaq    160(%rsp),%rbx
1892         leaq    224(%rsp),%rdi
1893         call    __ecp_nistz256_sub_fromq
1894
1895         movq    0+0(%rsp),%rax
1896         movq    0+8(%rsp),%rbp
1897         movq    0+16(%rsp),%rcx
1898         movq    0+24(%rsp),%r10
1899         leaq    64(%rsp),%rdi
1900
1901         call    __ecp_nistz256_subq
1902
1903         movq    %r12,0(%rdi)
1904         movq    %r13,8(%rdi)
1905         movq    %r8,16(%rdi)
1906         movq    %r9,24(%rdi)
1907         movq    352(%rsp),%rax
1908         leaq    352(%rsp),%rbx
1909         movq    0+160(%rsp),%r9
1910         movq    8+160(%rsp),%r10
1911         leaq    0+160(%rsp),%rsi
1912         movq    16+160(%rsp),%r11
1913         movq    24+160(%rsp),%r12
1914         leaq    32(%rsp),%rdi
1915         call    __ecp_nistz256_mul_montq
1916
1917         movq    96(%rsp),%rax
1918         leaq    96(%rsp),%rbx
1919         movq    0+64(%rsp),%r9
1920         movq    8+64(%rsp),%r10
1921         leaq    0+64(%rsp),%rsi
1922         movq    16+64(%rsp),%r11
1923         movq    24+64(%rsp),%r12
1924         leaq    64(%rsp),%rdi
1925         call    __ecp_nistz256_mul_montq
1926
1927         leaq    32(%rsp),%rbx
1928         leaq    256(%rsp),%rdi
1929         call    __ecp_nistz256_sub_fromq
1930
1931 .byte   102,72,15,126,199
1932
1933         movdqa  %xmm5,%xmm0
1934         movdqa  %xmm5,%xmm1
1935         pandn   288(%rsp),%xmm0
1936         movdqa  %xmm5,%xmm2
1937         pandn   288+16(%rsp),%xmm1
1938         movdqa  %xmm5,%xmm3
1939         pand    .LONE_mont(%rip),%xmm2
1940         pand    .LONE_mont+16(%rip),%xmm3
1941         por     %xmm0,%xmm2
1942         por     %xmm1,%xmm3
1943
1944         movdqa  %xmm4,%xmm0
1945         movdqa  %xmm4,%xmm1
1946         pandn   %xmm2,%xmm0
1947         movdqa  %xmm4,%xmm2
1948         pandn   %xmm3,%xmm1
1949         movdqa  %xmm4,%xmm3
1950         pand    384(%rsp),%xmm2
1951         pand    384+16(%rsp),%xmm3
1952         por     %xmm0,%xmm2
1953         por     %xmm1,%xmm3
1954         movdqu  %xmm2,64(%rdi)
1955         movdqu  %xmm3,80(%rdi)
1956
1957         movdqa  %xmm5,%xmm0
1958         movdqa  %xmm5,%xmm1
1959         pandn   224(%rsp),%xmm0
1960         movdqa  %xmm5,%xmm2
1961         pandn   224+16(%rsp),%xmm1
1962         movdqa  %xmm5,%xmm3
1963         pand    416(%rsp),%xmm2
1964         pand    416+16(%rsp),%xmm3
1965         por     %xmm0,%xmm2
1966         por     %xmm1,%xmm3
1967
1968         movdqa  %xmm4,%xmm0
1969         movdqa  %xmm4,%xmm1
1970         pandn   %xmm2,%xmm0
1971         movdqa  %xmm4,%xmm2
1972         pandn   %xmm3,%xmm1
1973         movdqa  %xmm4,%xmm3
1974         pand    320(%rsp),%xmm2
1975         pand    320+16(%rsp),%xmm3
1976         por     %xmm0,%xmm2
1977         por     %xmm1,%xmm3
1978         movdqu  %xmm2,0(%rdi)
1979         movdqu  %xmm3,16(%rdi)
1980
1981         movdqa  %xmm5,%xmm0
1982         movdqa  %xmm5,%xmm1
1983         pandn   256(%rsp),%xmm0
1984         movdqa  %xmm5,%xmm2
1985         pandn   256+16(%rsp),%xmm1
1986         movdqa  %xmm5,%xmm3
1987         pand    448(%rsp),%xmm2
1988         pand    448+16(%rsp),%xmm3
1989         por     %xmm0,%xmm2
1990         por     %xmm1,%xmm3
1991
1992         movdqa  %xmm4,%xmm0
1993         movdqa  %xmm4,%xmm1
1994         pandn   %xmm2,%xmm0
1995         movdqa  %xmm4,%xmm2
1996         pandn   %xmm3,%xmm1
1997         movdqa  %xmm4,%xmm3
1998         pand    352(%rsp),%xmm2
1999         pand    352+16(%rsp),%xmm3
2000         por     %xmm0,%xmm2
2001         por     %xmm1,%xmm3
2002         movdqu  %xmm2,32(%rdi)
2003         movdqu  %xmm3,48(%rdi)
2004
2005         addq    $480+8,%rsp
2006         popq    %r15
2007         popq    %r14
2008         popq    %r13
2009         popq    %r12
2010         popq    %rbx
2011         popq    %rbp
2012         .byte   0xf3,0xc3
2013 .size   ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
2014 .section .note.GNU-stack,"",%progbits