if_iwm - Adjust if_iwm_sta.h prototypes, don't pass iwm_node to rm_sta().
[dragonfly.git] / secure / lib / libcrypto / asm / ecp_nistz256-x86_64.s
1 .text   
2
3
4
5 .align  64
6 .Lpoly:
7 .quad   0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
8
9
10 .LRR:
11 .quad   0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd
12
13 .LOne:
14 .long   1,1,1,1,1,1,1,1
15 .LTwo:
16 .long   2,2,2,2,2,2,2,2
17 .LThree:
18 .long   3,3,3,3,3,3,3,3
19 .LONE_mont:
20 .quad   0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
21
22 .globl  ecp_nistz256_mul_by_2
23 .type   ecp_nistz256_mul_by_2,@function
24 .align  64
25 ecp_nistz256_mul_by_2:
26         pushq   %r12
27         pushq   %r13
28
29         movq    0(%rsi),%r8
30         movq    8(%rsi),%r9
31         addq    %r8,%r8
32         movq    16(%rsi),%r10
33         adcq    %r9,%r9
34         movq    24(%rsi),%r11
35         leaq    .Lpoly(%rip),%rsi
36         movq    %r8,%rax
37         adcq    %r10,%r10
38         adcq    %r11,%r11
39         movq    %r9,%rdx
40         sbbq    %r13,%r13
41
42         subq    0(%rsi),%r8
43         movq    %r10,%rcx
44         sbbq    8(%rsi),%r9
45         sbbq    16(%rsi),%r10
46         movq    %r11,%r12
47         sbbq    24(%rsi),%r11
48         testq   %r13,%r13
49
50         cmovzq  %rax,%r8
51         cmovzq  %rdx,%r9
52         movq    %r8,0(%rdi)
53         cmovzq  %rcx,%r10
54         movq    %r9,8(%rdi)
55         cmovzq  %r12,%r11
56         movq    %r10,16(%rdi)
57         movq    %r11,24(%rdi)
58
59         popq    %r13
60         popq    %r12
61         .byte   0xf3,0xc3
62 .size   ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2
63
64
65
66 .globl  ecp_nistz256_div_by_2
67 .type   ecp_nistz256_div_by_2,@function
68 .align  32
69 ecp_nistz256_div_by_2:
70         pushq   %r12
71         pushq   %r13
72
73         movq    0(%rsi),%r8
74         movq    8(%rsi),%r9
75         movq    16(%rsi),%r10
76         movq    %r8,%rax
77         movq    24(%rsi),%r11
78         leaq    .Lpoly(%rip),%rsi
79
80         movq    %r9,%rdx
81         xorq    %r13,%r13
82         addq    0(%rsi),%r8
83         movq    %r10,%rcx
84         adcq    8(%rsi),%r9
85         adcq    16(%rsi),%r10
86         movq    %r11,%r12
87         adcq    24(%rsi),%r11
88         adcq    $0,%r13
89         xorq    %rsi,%rsi
90         testq   $1,%rax
91
92         cmovzq  %rax,%r8
93         cmovzq  %rdx,%r9
94         cmovzq  %rcx,%r10
95         cmovzq  %r12,%r11
96         cmovzq  %rsi,%r13
97
98         movq    %r9,%rax
99         shrq    $1,%r8
100         shlq    $63,%rax
101         movq    %r10,%rdx
102         shrq    $1,%r9
103         orq     %rax,%r8
104         shlq    $63,%rdx
105         movq    %r11,%rcx
106         shrq    $1,%r10
107         orq     %rdx,%r9
108         shlq    $63,%rcx
109         shrq    $1,%r11
110         shlq    $63,%r13
111         orq     %rcx,%r10
112         orq     %r13,%r11
113
114         movq    %r8,0(%rdi)
115         movq    %r9,8(%rdi)
116         movq    %r10,16(%rdi)
117         movq    %r11,24(%rdi)
118
119         popq    %r13
120         popq    %r12
121         .byte   0xf3,0xc3
122 .size   ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2
123
124
125
126 .globl  ecp_nistz256_mul_by_3
127 .type   ecp_nistz256_mul_by_3,@function
128 .align  32
129 ecp_nistz256_mul_by_3:
130         pushq   %r12
131         pushq   %r13
132
133         movq    0(%rsi),%r8
134         xorq    %r13,%r13
135         movq    8(%rsi),%r9
136         addq    %r8,%r8
137         movq    16(%rsi),%r10
138         adcq    %r9,%r9
139         movq    24(%rsi),%r11
140         movq    %r8,%rax
141         adcq    %r10,%r10
142         adcq    %r11,%r11
143         movq    %r9,%rdx
144         adcq    $0,%r13
145
146         subq    $-1,%r8
147         movq    %r10,%rcx
148         sbbq    .Lpoly+8(%rip),%r9
149         sbbq    $0,%r10
150         movq    %r11,%r12
151         sbbq    .Lpoly+24(%rip),%r11
152         testq   %r13,%r13
153
154         cmovzq  %rax,%r8
155         cmovzq  %rdx,%r9
156         cmovzq  %rcx,%r10
157         cmovzq  %r12,%r11
158
159         xorq    %r13,%r13
160         addq    0(%rsi),%r8
161         adcq    8(%rsi),%r9
162         movq    %r8,%rax
163         adcq    16(%rsi),%r10
164         adcq    24(%rsi),%r11
165         movq    %r9,%rdx
166         adcq    $0,%r13
167
168         subq    $-1,%r8
169         movq    %r10,%rcx
170         sbbq    .Lpoly+8(%rip),%r9
171         sbbq    $0,%r10
172         movq    %r11,%r12
173         sbbq    .Lpoly+24(%rip),%r11
174         testq   %r13,%r13
175
176         cmovzq  %rax,%r8
177         cmovzq  %rdx,%r9
178         movq    %r8,0(%rdi)
179         cmovzq  %rcx,%r10
180         movq    %r9,8(%rdi)
181         cmovzq  %r12,%r11
182         movq    %r10,16(%rdi)
183         movq    %r11,24(%rdi)
184
185         popq    %r13
186         popq    %r12
187         .byte   0xf3,0xc3
188 .size   ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3
189
190
191
192 .globl  ecp_nistz256_add
193 .type   ecp_nistz256_add,@function
194 .align  32
195 ecp_nistz256_add:
196         pushq   %r12
197         pushq   %r13
198
199         movq    0(%rsi),%r8
200         xorq    %r13,%r13
201         movq    8(%rsi),%r9
202         movq    16(%rsi),%r10
203         movq    24(%rsi),%r11
204         leaq    .Lpoly(%rip),%rsi
205
206         addq    0(%rdx),%r8
207         adcq    8(%rdx),%r9
208         movq    %r8,%rax
209         adcq    16(%rdx),%r10
210         adcq    24(%rdx),%r11
211         movq    %r9,%rdx
212         adcq    $0,%r13
213
214         subq    0(%rsi),%r8
215         movq    %r10,%rcx
216         sbbq    8(%rsi),%r9
217         sbbq    16(%rsi),%r10
218         movq    %r11,%r12
219         sbbq    24(%rsi),%r11
220         testq   %r13,%r13
221
222         cmovzq  %rax,%r8
223         cmovzq  %rdx,%r9
224         movq    %r8,0(%rdi)
225         cmovzq  %rcx,%r10
226         movq    %r9,8(%rdi)
227         cmovzq  %r12,%r11
228         movq    %r10,16(%rdi)
229         movq    %r11,24(%rdi)
230
231         popq    %r13
232         popq    %r12
233         .byte   0xf3,0xc3
234 .size   ecp_nistz256_add,.-ecp_nistz256_add
235
236
237
238 .globl  ecp_nistz256_sub
239 .type   ecp_nistz256_sub,@function
240 .align  32
241 ecp_nistz256_sub:
242         pushq   %r12
243         pushq   %r13
244
245         movq    0(%rsi),%r8
246         xorq    %r13,%r13
247         movq    8(%rsi),%r9
248         movq    16(%rsi),%r10
249         movq    24(%rsi),%r11
250         leaq    .Lpoly(%rip),%rsi
251
252         subq    0(%rdx),%r8
253         sbbq    8(%rdx),%r9
254         movq    %r8,%rax
255         sbbq    16(%rdx),%r10
256         sbbq    24(%rdx),%r11
257         movq    %r9,%rdx
258         sbbq    $0,%r13
259
260         addq    0(%rsi),%r8
261         movq    %r10,%rcx
262         adcq    8(%rsi),%r9
263         adcq    16(%rsi),%r10
264         movq    %r11,%r12
265         adcq    24(%rsi),%r11
266         testq   %r13,%r13
267
268         cmovzq  %rax,%r8
269         cmovzq  %rdx,%r9
270         movq    %r8,0(%rdi)
271         cmovzq  %rcx,%r10
272         movq    %r9,8(%rdi)
273         cmovzq  %r12,%r11
274         movq    %r10,16(%rdi)
275         movq    %r11,24(%rdi)
276
277         popq    %r13
278         popq    %r12
279         .byte   0xf3,0xc3
280 .size   ecp_nistz256_sub,.-ecp_nistz256_sub
281
282
283
284 .globl  ecp_nistz256_neg
285 .type   ecp_nistz256_neg,@function
286 .align  32
287 ecp_nistz256_neg:
288         pushq   %r12
289         pushq   %r13
290
291         xorq    %r8,%r8
292         xorq    %r9,%r9
293         xorq    %r10,%r10
294         xorq    %r11,%r11
295         xorq    %r13,%r13
296
297         subq    0(%rsi),%r8
298         sbbq    8(%rsi),%r9
299         sbbq    16(%rsi),%r10
300         movq    %r8,%rax
301         sbbq    24(%rsi),%r11
302         leaq    .Lpoly(%rip),%rsi
303         movq    %r9,%rdx
304         sbbq    $0,%r13
305
306         addq    0(%rsi),%r8
307         movq    %r10,%rcx
308         adcq    8(%rsi),%r9
309         adcq    16(%rsi),%r10
310         movq    %r11,%r12
311         adcq    24(%rsi),%r11
312         testq   %r13,%r13
313
314         cmovzq  %rax,%r8
315         cmovzq  %rdx,%r9
316         movq    %r8,0(%rdi)
317         cmovzq  %rcx,%r10
318         movq    %r9,8(%rdi)
319         cmovzq  %r12,%r11
320         movq    %r10,16(%rdi)
321         movq    %r11,24(%rdi)
322
323         popq    %r13
324         popq    %r12
325         .byte   0xf3,0xc3
326 .size   ecp_nistz256_neg,.-ecp_nistz256_neg
327
328
329
330
331 .globl  ecp_nistz256_to_mont
332 .type   ecp_nistz256_to_mont,@function
333 .align  32
334 ecp_nistz256_to_mont:
335         movl    $0x80100,%ecx
336         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
337         leaq    .LRR(%rip),%rdx
338         jmp     .Lmul_mont
339 .size   ecp_nistz256_to_mont,.-ecp_nistz256_to_mont
340
341
342
343
344
345
346
347 .globl  ecp_nistz256_mul_mont
348 .type   ecp_nistz256_mul_mont,@function
349 .align  32
350 ecp_nistz256_mul_mont:
351         movl    $0x80100,%ecx
352         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
353 .Lmul_mont:
354         pushq   %rbp
355         pushq   %rbx
356         pushq   %r12
357         pushq   %r13
358         pushq   %r14
359         pushq   %r15
360         cmpl    $0x80100,%ecx
361         je      .Lmul_montx
362         movq    %rdx,%rbx
363         movq    0(%rdx),%rax
364         movq    0(%rsi),%r9
365         movq    8(%rsi),%r10
366         movq    16(%rsi),%r11
367         movq    24(%rsi),%r12
368
369         call    __ecp_nistz256_mul_montq
370         jmp     .Lmul_mont_done
371
372 .align  32
373 .Lmul_montx:
374         movq    %rdx,%rbx
375         movq    0(%rdx),%rdx
376         movq    0(%rsi),%r9
377         movq    8(%rsi),%r10
378         movq    16(%rsi),%r11
379         movq    24(%rsi),%r12
380         leaq    -128(%rsi),%rsi
381
382         call    __ecp_nistz256_mul_montx
383 .Lmul_mont_done:
384         popq    %r15
385         popq    %r14
386         popq    %r13
387         popq    %r12
388         popq    %rbx
389         popq    %rbp
390         .byte   0xf3,0xc3
391 .size   ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
392
393 .type   __ecp_nistz256_mul_montq,@function
394 .align  32
395 __ecp_nistz256_mul_montq:
396
397
398         movq    %rax,%rbp
399         mulq    %r9
400         movq    .Lpoly+8(%rip),%r14
401         movq    %rax,%r8
402         movq    %rbp,%rax
403         movq    %rdx,%r9
404
405         mulq    %r10
406         movq    .Lpoly+24(%rip),%r15
407         addq    %rax,%r9
408         movq    %rbp,%rax
409         adcq    $0,%rdx
410         movq    %rdx,%r10
411
412         mulq    %r11
413         addq    %rax,%r10
414         movq    %rbp,%rax
415         adcq    $0,%rdx
416         movq    %rdx,%r11
417
418         mulq    %r12
419         addq    %rax,%r11
420         movq    %r8,%rax
421         adcq    $0,%rdx
422         xorq    %r13,%r13
423         movq    %rdx,%r12
424
425
426
427
428
429
430
431
432
433
434         movq    %r8,%rbp
435         shlq    $32,%r8
436         mulq    %r15
437         shrq    $32,%rbp
438         addq    %r8,%r9
439         adcq    %rbp,%r10
440         adcq    %rax,%r11
441         movq    8(%rbx),%rax
442         adcq    %rdx,%r12
443         adcq    $0,%r13
444         xorq    %r8,%r8
445
446
447
448         movq    %rax,%rbp
449         mulq    0(%rsi)
450         addq    %rax,%r9
451         movq    %rbp,%rax
452         adcq    $0,%rdx
453         movq    %rdx,%rcx
454
455         mulq    8(%rsi)
456         addq    %rcx,%r10
457         adcq    $0,%rdx
458         addq    %rax,%r10
459         movq    %rbp,%rax
460         adcq    $0,%rdx
461         movq    %rdx,%rcx
462
463         mulq    16(%rsi)
464         addq    %rcx,%r11
465         adcq    $0,%rdx
466         addq    %rax,%r11
467         movq    %rbp,%rax
468         adcq    $0,%rdx
469         movq    %rdx,%rcx
470
471         mulq    24(%rsi)
472         addq    %rcx,%r12
473         adcq    $0,%rdx
474         addq    %rax,%r12
475         movq    %r9,%rax
476         adcq    %rdx,%r13
477         adcq    $0,%r8
478
479
480
481         movq    %r9,%rbp
482         shlq    $32,%r9
483         mulq    %r15
484         shrq    $32,%rbp
485         addq    %r9,%r10
486         adcq    %rbp,%r11
487         adcq    %rax,%r12
488         movq    16(%rbx),%rax
489         adcq    %rdx,%r13
490         adcq    $0,%r8
491         xorq    %r9,%r9
492
493
494
495         movq    %rax,%rbp
496         mulq    0(%rsi)
497         addq    %rax,%r10
498         movq    %rbp,%rax
499         adcq    $0,%rdx
500         movq    %rdx,%rcx
501
502         mulq    8(%rsi)
503         addq    %rcx,%r11
504         adcq    $0,%rdx
505         addq    %rax,%r11
506         movq    %rbp,%rax
507         adcq    $0,%rdx
508         movq    %rdx,%rcx
509
510         mulq    16(%rsi)
511         addq    %rcx,%r12
512         adcq    $0,%rdx
513         addq    %rax,%r12
514         movq    %rbp,%rax
515         adcq    $0,%rdx
516         movq    %rdx,%rcx
517
518         mulq    24(%rsi)
519         addq    %rcx,%r13
520         adcq    $0,%rdx
521         addq    %rax,%r13
522         movq    %r10,%rax
523         adcq    %rdx,%r8
524         adcq    $0,%r9
525
526
527
528         movq    %r10,%rbp
529         shlq    $32,%r10
530         mulq    %r15
531         shrq    $32,%rbp
532         addq    %r10,%r11
533         adcq    %rbp,%r12
534         adcq    %rax,%r13
535         movq    24(%rbx),%rax
536         adcq    %rdx,%r8
537         adcq    $0,%r9
538         xorq    %r10,%r10
539
540
541
542         movq    %rax,%rbp
543         mulq    0(%rsi)
544         addq    %rax,%r11
545         movq    %rbp,%rax
546         adcq    $0,%rdx
547         movq    %rdx,%rcx
548
549         mulq    8(%rsi)
550         addq    %rcx,%r12
551         adcq    $0,%rdx
552         addq    %rax,%r12
553         movq    %rbp,%rax
554         adcq    $0,%rdx
555         movq    %rdx,%rcx
556
557         mulq    16(%rsi)
558         addq    %rcx,%r13
559         adcq    $0,%rdx
560         addq    %rax,%r13
561         movq    %rbp,%rax
562         adcq    $0,%rdx
563         movq    %rdx,%rcx
564
565         mulq    24(%rsi)
566         addq    %rcx,%r8
567         adcq    $0,%rdx
568         addq    %rax,%r8
569         movq    %r11,%rax
570         adcq    %rdx,%r9
571         adcq    $0,%r10
572
573
574
575         movq    %r11,%rbp
576         shlq    $32,%r11
577         mulq    %r15
578         shrq    $32,%rbp
579         addq    %r11,%r12
580         adcq    %rbp,%r13
581         movq    %r12,%rcx
582         adcq    %rax,%r8
583         adcq    %rdx,%r9
584         movq    %r13,%rbp
585         adcq    $0,%r10
586
587
588
589         subq    $-1,%r12
590         movq    %r8,%rbx
591         sbbq    %r14,%r13
592         sbbq    $0,%r8
593         movq    %r9,%rdx
594         sbbq    %r15,%r9
595         sbbq    $0,%r10
596
597         cmovcq  %rcx,%r12
598         cmovcq  %rbp,%r13
599         movq    %r12,0(%rdi)
600         cmovcq  %rbx,%r8
601         movq    %r13,8(%rdi)
602         cmovcq  %rdx,%r9
603         movq    %r8,16(%rdi)
604         movq    %r9,24(%rdi)
605
606         .byte   0xf3,0xc3
607 .size   __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
608
609
610
611
612
613
614
615
616 .globl  ecp_nistz256_sqr_mont
617 .type   ecp_nistz256_sqr_mont,@function
618 .align  32
619 ecp_nistz256_sqr_mont:
620         movl    $0x80100,%ecx
621         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
622         pushq   %rbp
623         pushq   %rbx
624         pushq   %r12
625         pushq   %r13
626         pushq   %r14
627         pushq   %r15
628         cmpl    $0x80100,%ecx
629         je      .Lsqr_montx
630         movq    0(%rsi),%rax
631         movq    8(%rsi),%r14
632         movq    16(%rsi),%r15
633         movq    24(%rsi),%r8
634
635         call    __ecp_nistz256_sqr_montq
636         jmp     .Lsqr_mont_done
637
638 .align  32
639 .Lsqr_montx:
640         movq    0(%rsi),%rdx
641         movq    8(%rsi),%r14
642         movq    16(%rsi),%r15
643         movq    24(%rsi),%r8
644         leaq    -128(%rsi),%rsi
645
646         call    __ecp_nistz256_sqr_montx
647 .Lsqr_mont_done:
648         popq    %r15
649         popq    %r14
650         popq    %r13
651         popq    %r12
652         popq    %rbx
653         popq    %rbp
654         .byte   0xf3,0xc3
655 .size   ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
656
657 .type   __ecp_nistz256_sqr_montq,@function
658 .align  32
659 __ecp_nistz256_sqr_montq:
660         movq    %rax,%r13
661         mulq    %r14
662         movq    %rax,%r9
663         movq    %r15,%rax
664         movq    %rdx,%r10
665
666         mulq    %r13
667         addq    %rax,%r10
668         movq    %r8,%rax
669         adcq    $0,%rdx
670         movq    %rdx,%r11
671
672         mulq    %r13
673         addq    %rax,%r11
674         movq    %r15,%rax
675         adcq    $0,%rdx
676         movq    %rdx,%r12
677
678
679         mulq    %r14
680         addq    %rax,%r11
681         movq    %r8,%rax
682         adcq    $0,%rdx
683         movq    %rdx,%rbp
684
685         mulq    %r14
686         addq    %rax,%r12
687         movq    %r8,%rax
688         adcq    $0,%rdx
689         addq    %rbp,%r12
690         movq    %rdx,%r13
691         adcq    $0,%r13
692
693
694         mulq    %r15
695         xorq    %r15,%r15
696         addq    %rax,%r13
697         movq    0(%rsi),%rax
698         movq    %rdx,%r14
699         adcq    $0,%r14
700
701         addq    %r9,%r9
702         adcq    %r10,%r10
703         adcq    %r11,%r11
704         adcq    %r12,%r12
705         adcq    %r13,%r13
706         adcq    %r14,%r14
707         adcq    $0,%r15
708
709         mulq    %rax
710         movq    %rax,%r8
711         movq    8(%rsi),%rax
712         movq    %rdx,%rcx
713
714         mulq    %rax
715         addq    %rcx,%r9
716         adcq    %rax,%r10
717         movq    16(%rsi),%rax
718         adcq    $0,%rdx
719         movq    %rdx,%rcx
720
721         mulq    %rax
722         addq    %rcx,%r11
723         adcq    %rax,%r12
724         movq    24(%rsi),%rax
725         adcq    $0,%rdx
726         movq    %rdx,%rcx
727
728         mulq    %rax
729         addq    %rcx,%r13
730         adcq    %rax,%r14
731         movq    %r8,%rax
732         adcq    %rdx,%r15
733
734         movq    .Lpoly+8(%rip),%rsi
735         movq    .Lpoly+24(%rip),%rbp
736
737
738
739
740         movq    %r8,%rcx
741         shlq    $32,%r8
742         mulq    %rbp
743         shrq    $32,%rcx
744         addq    %r8,%r9
745         adcq    %rcx,%r10
746         adcq    %rax,%r11
747         movq    %r9,%rax
748         adcq    $0,%rdx
749
750
751
752         movq    %r9,%rcx
753         shlq    $32,%r9
754         movq    %rdx,%r8
755         mulq    %rbp
756         shrq    $32,%rcx
757         addq    %r9,%r10
758         adcq    %rcx,%r11
759         adcq    %rax,%r8
760         movq    %r10,%rax
761         adcq    $0,%rdx
762
763
764
765         movq    %r10,%rcx
766         shlq    $32,%r10
767         movq    %rdx,%r9
768         mulq    %rbp
769         shrq    $32,%rcx
770         addq    %r10,%r11
771         adcq    %rcx,%r8
772         adcq    %rax,%r9
773         movq    %r11,%rax
774         adcq    $0,%rdx
775
776
777
778         movq    %r11,%rcx
779         shlq    $32,%r11
780         movq    %rdx,%r10
781         mulq    %rbp
782         shrq    $32,%rcx
783         addq    %r11,%r8
784         adcq    %rcx,%r9
785         adcq    %rax,%r10
786         adcq    $0,%rdx
787         xorq    %r11,%r11
788
789
790
791         addq    %r8,%r12
792         adcq    %r9,%r13
793         movq    %r12,%r8
794         adcq    %r10,%r14
795         adcq    %rdx,%r15
796         movq    %r13,%r9
797         adcq    $0,%r11
798
799         subq    $-1,%r12
800         movq    %r14,%r10
801         sbbq    %rsi,%r13
802         sbbq    $0,%r14
803         movq    %r15,%rcx
804         sbbq    %rbp,%r15
805         sbbq    $0,%r11
806
807         cmovcq  %r8,%r12
808         cmovcq  %r9,%r13
809         movq    %r12,0(%rdi)
810         cmovcq  %r10,%r14
811         movq    %r13,8(%rdi)
812         cmovcq  %rcx,%r15
813         movq    %r14,16(%rdi)
814         movq    %r15,24(%rdi)
815
816         .byte   0xf3,0xc3
817 .size   __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
818 .type   __ecp_nistz256_mul_montx,@function
819 .align  32
820 __ecp_nistz256_mul_montx:
821
822
823         mulxq   %r9,%r8,%r9
824         mulxq   %r10,%rcx,%r10
825         movq    $32,%r14
826         xorq    %r13,%r13
827         mulxq   %r11,%rbp,%r11
828         movq    .Lpoly+24(%rip),%r15
829         adcq    %rcx,%r9
830         mulxq   %r12,%rcx,%r12
831         movq    %r8,%rdx
832         adcq    %rbp,%r10
833         shlxq   %r14,%r8,%rbp
834         adcq    %rcx,%r11
835         shrxq   %r14,%r8,%rcx
836         adcq    $0,%r12
837
838
839
840         addq    %rbp,%r9
841         adcq    %rcx,%r10
842
843         mulxq   %r15,%rcx,%rbp
844         movq    8(%rbx),%rdx
845         adcq    %rcx,%r11
846         adcq    %rbp,%r12
847         adcq    $0,%r13
848         xorq    %r8,%r8
849
850
851
852         mulxq   0+128(%rsi),%rcx,%rbp
853         adcxq   %rcx,%r9
854         adoxq   %rbp,%r10
855
856         mulxq   8+128(%rsi),%rcx,%rbp
857         adcxq   %rcx,%r10
858         adoxq   %rbp,%r11
859
860         mulxq   16+128(%rsi),%rcx,%rbp
861         adcxq   %rcx,%r11
862         adoxq   %rbp,%r12
863
864         mulxq   24+128(%rsi),%rcx,%rbp
865         movq    %r9,%rdx
866         adcxq   %rcx,%r12
867         shlxq   %r14,%r9,%rcx
868         adoxq   %rbp,%r13
869         shrxq   %r14,%r9,%rbp
870
871         adcxq   %r8,%r13
872         adoxq   %r8,%r8
873         adcq    $0,%r8
874
875
876
877         addq    %rcx,%r10
878         adcq    %rbp,%r11
879
880         mulxq   %r15,%rcx,%rbp
881         movq    16(%rbx),%rdx
882         adcq    %rcx,%r12
883         adcq    %rbp,%r13
884         adcq    $0,%r8
885         xorq    %r9,%r9
886
887
888
889         mulxq   0+128(%rsi),%rcx,%rbp
890         adcxq   %rcx,%r10
891         adoxq   %rbp,%r11
892
893         mulxq   8+128(%rsi),%rcx,%rbp
894         adcxq   %rcx,%r11
895         adoxq   %rbp,%r12
896
897         mulxq   16+128(%rsi),%rcx,%rbp
898         adcxq   %rcx,%r12
899         adoxq   %rbp,%r13
900
901         mulxq   24+128(%rsi),%rcx,%rbp
902         movq    %r10,%rdx
903         adcxq   %rcx,%r13
904         shlxq   %r14,%r10,%rcx
905         adoxq   %rbp,%r8
906         shrxq   %r14,%r10,%rbp
907
908         adcxq   %r9,%r8
909         adoxq   %r9,%r9
910         adcq    $0,%r9
911
912
913
914         addq    %rcx,%r11
915         adcq    %rbp,%r12
916
917         mulxq   %r15,%rcx,%rbp
918         movq    24(%rbx),%rdx
919         adcq    %rcx,%r13
920         adcq    %rbp,%r8
921         adcq    $0,%r9
922         xorq    %r10,%r10
923
924
925
926         mulxq   0+128(%rsi),%rcx,%rbp
927         adcxq   %rcx,%r11
928         adoxq   %rbp,%r12
929
930         mulxq   8+128(%rsi),%rcx,%rbp
931         adcxq   %rcx,%r12
932         adoxq   %rbp,%r13
933
934         mulxq   16+128(%rsi),%rcx,%rbp
935         adcxq   %rcx,%r13
936         adoxq   %rbp,%r8
937
938         mulxq   24+128(%rsi),%rcx,%rbp
939         movq    %r11,%rdx
940         adcxq   %rcx,%r8
941         shlxq   %r14,%r11,%rcx
942         adoxq   %rbp,%r9
943         shrxq   %r14,%r11,%rbp
944
945         adcxq   %r10,%r9
946         adoxq   %r10,%r10
947         adcq    $0,%r10
948
949
950
951         addq    %rcx,%r12
952         adcq    %rbp,%r13
953
954         mulxq   %r15,%rcx,%rbp
955         movq    %r12,%rbx
956         movq    .Lpoly+8(%rip),%r14
957         adcq    %rcx,%r8
958         movq    %r13,%rdx
959         adcq    %rbp,%r9
960         adcq    $0,%r10
961
962
963
964         xorl    %eax,%eax
965         movq    %r8,%rcx
966         sbbq    $-1,%r12
967         sbbq    %r14,%r13
968         sbbq    $0,%r8
969         movq    %r9,%rbp
970         sbbq    %r15,%r9
971         sbbq    $0,%r10
972
973         cmovcq  %rbx,%r12
974         cmovcq  %rdx,%r13
975         movq    %r12,0(%rdi)
976         cmovcq  %rcx,%r8
977         movq    %r13,8(%rdi)
978         cmovcq  %rbp,%r9
979         movq    %r8,16(%rdi)
980         movq    %r9,24(%rdi)
981
982         .byte   0xf3,0xc3
983 .size   __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
984
985 .type   __ecp_nistz256_sqr_montx,@function
986 .align  32
987 __ecp_nistz256_sqr_montx:
988         mulxq   %r14,%r9,%r10
989         mulxq   %r15,%rcx,%r11
990         xorl    %eax,%eax
991         adcq    %rcx,%r10
992         mulxq   %r8,%rbp,%r12
993         movq    %r14,%rdx
994         adcq    %rbp,%r11
995         adcq    $0,%r12
996         xorq    %r13,%r13
997
998
999         mulxq   %r15,%rcx,%rbp
1000         adcxq   %rcx,%r11
1001         adoxq   %rbp,%r12
1002
1003         mulxq   %r8,%rcx,%rbp
1004         movq    %r15,%rdx
1005         adcxq   %rcx,%r12
1006         adoxq   %rbp,%r13
1007         adcq    $0,%r13
1008
1009
1010         mulxq   %r8,%rcx,%r14
1011         movq    0+128(%rsi),%rdx
1012         xorq    %r15,%r15
1013         adcxq   %r9,%r9
1014         adoxq   %rcx,%r13
1015         adcxq   %r10,%r10
1016         adoxq   %r15,%r14
1017
1018         mulxq   %rdx,%r8,%rbp
1019         movq    8+128(%rsi),%rdx
1020         adcxq   %r11,%r11
1021         adoxq   %rbp,%r9
1022         adcxq   %r12,%r12
1023         mulxq   %rdx,%rcx,%rax
1024         movq    16+128(%rsi),%rdx
1025         adcxq   %r13,%r13
1026         adoxq   %rcx,%r10
1027         adcxq   %r14,%r14
1028 .byte   0x67
1029         mulxq   %rdx,%rcx,%rbp
1030         movq    24+128(%rsi),%rdx
1031         adoxq   %rax,%r11
1032         adcxq   %r15,%r15
1033         adoxq   %rcx,%r12
1034         movq    $32,%rsi
1035         adoxq   %rbp,%r13
1036 .byte   0x67,0x67
1037         mulxq   %rdx,%rcx,%rax
1038         movq    %r8,%rdx
1039         adoxq   %rcx,%r14
1040         shlxq   %rsi,%r8,%rcx
1041         adoxq   %rax,%r15
1042         shrxq   %rsi,%r8,%rax
1043         movq    .Lpoly+24(%rip),%rbp
1044
1045
1046         addq    %rcx,%r9
1047         adcq    %rax,%r10
1048
1049         mulxq   %rbp,%rcx,%r8
1050         movq    %r9,%rdx
1051         adcq    %rcx,%r11
1052         shlxq   %rsi,%r9,%rcx
1053         adcq    $0,%r8
1054         shrxq   %rsi,%r9,%rax
1055
1056
1057         addq    %rcx,%r10
1058         adcq    %rax,%r11
1059
1060         mulxq   %rbp,%rcx,%r9
1061         movq    %r10,%rdx
1062         adcq    %rcx,%r8
1063         shlxq   %rsi,%r10,%rcx
1064         adcq    $0,%r9
1065         shrxq   %rsi,%r10,%rax
1066
1067
1068         addq    %rcx,%r11
1069         adcq    %rax,%r8
1070
1071         mulxq   %rbp,%rcx,%r10
1072         movq    %r11,%rdx
1073         adcq    %rcx,%r9
1074         shlxq   %rsi,%r11,%rcx
1075         adcq    $0,%r10
1076         shrxq   %rsi,%r11,%rax
1077
1078
1079         addq    %rcx,%r8
1080         adcq    %rax,%r9
1081
1082         mulxq   %rbp,%rcx,%r11
1083         adcq    %rcx,%r10
1084         adcq    $0,%r11
1085
1086         xorq    %rdx,%rdx
1087         adcq    %r8,%r12
1088         movq    .Lpoly+8(%rip),%rsi
1089         adcq    %r9,%r13
1090         movq    %r12,%r8
1091         adcq    %r10,%r14
1092         adcq    %r11,%r15
1093         movq    %r13,%r9
1094         adcq    $0,%rdx
1095
1096         xorl    %eax,%eax
1097         sbbq    $-1,%r12
1098         movq    %r14,%r10
1099         sbbq    %rsi,%r13
1100         sbbq    $0,%r14
1101         movq    %r15,%r11
1102         sbbq    %rbp,%r15
1103         sbbq    $0,%rdx
1104
1105         cmovcq  %r8,%r12
1106         cmovcq  %r9,%r13
1107         movq    %r12,0(%rdi)
1108         cmovcq  %r10,%r14
1109         movq    %r13,8(%rdi)
1110         cmovcq  %r11,%r15
1111         movq    %r14,16(%rdi)
1112         movq    %r15,24(%rdi)
1113
1114         .byte   0xf3,0xc3
1115 .size   __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
1116
1117
1118
1119
1120
1121
1122 .globl  ecp_nistz256_from_mont
1123 .type   ecp_nistz256_from_mont,@function
1124 .align  32
1125 ecp_nistz256_from_mont:
1126         pushq   %r12
1127         pushq   %r13
1128
1129         movq    0(%rsi),%rax
1130         movq    .Lpoly+24(%rip),%r13
1131         movq    8(%rsi),%r9
1132         movq    16(%rsi),%r10
1133         movq    24(%rsi),%r11
1134         movq    %rax,%r8
1135         movq    .Lpoly+8(%rip),%r12
1136
1137
1138
1139         movq    %rax,%rcx
1140         shlq    $32,%r8
1141         mulq    %r13
1142         shrq    $32,%rcx
1143         addq    %r8,%r9
1144         adcq    %rcx,%r10
1145         adcq    %rax,%r11
1146         movq    %r9,%rax
1147         adcq    $0,%rdx
1148
1149
1150
1151         movq    %r9,%rcx
1152         shlq    $32,%r9
1153         movq    %rdx,%r8
1154         mulq    %r13
1155         shrq    $32,%rcx
1156         addq    %r9,%r10
1157         adcq    %rcx,%r11
1158         adcq    %rax,%r8
1159         movq    %r10,%rax
1160         adcq    $0,%rdx
1161
1162
1163
1164         movq    %r10,%rcx
1165         shlq    $32,%r10
1166         movq    %rdx,%r9
1167         mulq    %r13
1168         shrq    $32,%rcx
1169         addq    %r10,%r11
1170         adcq    %rcx,%r8
1171         adcq    %rax,%r9
1172         movq    %r11,%rax
1173         adcq    $0,%rdx
1174
1175
1176
1177         movq    %r11,%rcx
1178         shlq    $32,%r11
1179         movq    %rdx,%r10
1180         mulq    %r13
1181         shrq    $32,%rcx
1182         addq    %r11,%r8
1183         adcq    %rcx,%r9
1184         movq    %r8,%rcx
1185         adcq    %rax,%r10
1186         movq    %r9,%rsi
1187         adcq    $0,%rdx
1188
1189
1190
1191         subq    $-1,%r8
1192         movq    %r10,%rax
1193         sbbq    %r12,%r9
1194         sbbq    $0,%r10
1195         movq    %rdx,%r11
1196         sbbq    %r13,%rdx
1197         sbbq    %r13,%r13
1198
1199         cmovnzq %rcx,%r8
1200         cmovnzq %rsi,%r9
1201         movq    %r8,0(%rdi)
1202         cmovnzq %rax,%r10
1203         movq    %r9,8(%rdi)
1204         cmovzq  %rdx,%r11
1205         movq    %r10,16(%rdi)
1206         movq    %r11,24(%rdi)
1207
1208         popq    %r13
1209         popq    %r12
1210         .byte   0xf3,0xc3
1211 .size   ecp_nistz256_from_mont,.-ecp_nistz256_from_mont
1212
1213
1214 .globl  ecp_nistz256_select_w5
1215 .type   ecp_nistz256_select_w5,@function
1216 .align  32
1217 ecp_nistz256_select_w5:
1218         movl    OPENSSL_ia32cap_P+8(%rip),%eax
1219         testl   $32,%eax
1220         jnz     .Lavx2_select_w5
1221         movdqa  .LOne(%rip),%xmm0
1222         movd    %edx,%xmm1
1223
1224         pxor    %xmm2,%xmm2
1225         pxor    %xmm3,%xmm3
1226         pxor    %xmm4,%xmm4
1227         pxor    %xmm5,%xmm5
1228         pxor    %xmm6,%xmm6
1229         pxor    %xmm7,%xmm7
1230
1231         movdqa  %xmm0,%xmm8
1232         pshufd  $0,%xmm1,%xmm1
1233
1234         movq    $16,%rax
1235 .Lselect_loop_sse_w5:
1236
1237         movdqa  %xmm8,%xmm15
1238         paddd   %xmm0,%xmm8
1239         pcmpeqd %xmm1,%xmm15
1240
1241         movdqa  0(%rsi),%xmm9
1242         movdqa  16(%rsi),%xmm10
1243         movdqa  32(%rsi),%xmm11
1244         movdqa  48(%rsi),%xmm12
1245         movdqa  64(%rsi),%xmm13
1246         movdqa  80(%rsi),%xmm14
1247         leaq    96(%rsi),%rsi
1248
1249         pand    %xmm15,%xmm9
1250         pand    %xmm15,%xmm10
1251         por     %xmm9,%xmm2
1252         pand    %xmm15,%xmm11
1253         por     %xmm10,%xmm3
1254         pand    %xmm15,%xmm12
1255         por     %xmm11,%xmm4
1256         pand    %xmm15,%xmm13
1257         por     %xmm12,%xmm5
1258         pand    %xmm15,%xmm14
1259         por     %xmm13,%xmm6
1260         por     %xmm14,%xmm7
1261
1262         decq    %rax
1263         jnz     .Lselect_loop_sse_w5
1264
1265         movdqu  %xmm2,0(%rdi)
1266         movdqu  %xmm3,16(%rdi)
1267         movdqu  %xmm4,32(%rdi)
1268         movdqu  %xmm5,48(%rdi)
1269         movdqu  %xmm6,64(%rdi)
1270         movdqu  %xmm7,80(%rdi)
1271         .byte   0xf3,0xc3
1272 .size   ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
1273
1274
1275
1276 .globl  ecp_nistz256_select_w7
1277 .type   ecp_nistz256_select_w7,@function
1278 .align  32
1279 ecp_nistz256_select_w7:
1280         movl    OPENSSL_ia32cap_P+8(%rip),%eax
1281         testl   $32,%eax
1282         jnz     .Lavx2_select_w7
1283         movdqa  .LOne(%rip),%xmm8
1284         movd    %edx,%xmm1
1285
1286         pxor    %xmm2,%xmm2
1287         pxor    %xmm3,%xmm3
1288         pxor    %xmm4,%xmm4
1289         pxor    %xmm5,%xmm5
1290
1291         movdqa  %xmm8,%xmm0
1292         pshufd  $0,%xmm1,%xmm1
1293         movq    $64,%rax
1294
1295 .Lselect_loop_sse_w7:
1296         movdqa  %xmm8,%xmm15
1297         paddd   %xmm0,%xmm8
1298         movdqa  0(%rsi),%xmm9
1299         movdqa  16(%rsi),%xmm10
1300         pcmpeqd %xmm1,%xmm15
1301         movdqa  32(%rsi),%xmm11
1302         movdqa  48(%rsi),%xmm12
1303         leaq    64(%rsi),%rsi
1304
1305         pand    %xmm15,%xmm9
1306         pand    %xmm15,%xmm10
1307         por     %xmm9,%xmm2
1308         pand    %xmm15,%xmm11
1309         por     %xmm10,%xmm3
1310         pand    %xmm15,%xmm12
1311         por     %xmm11,%xmm4
1312         prefetcht0      255(%rsi)
1313         por     %xmm12,%xmm5
1314
1315         decq    %rax
1316         jnz     .Lselect_loop_sse_w7
1317
1318         movdqu  %xmm2,0(%rdi)
1319         movdqu  %xmm3,16(%rdi)
1320         movdqu  %xmm4,32(%rdi)
1321         movdqu  %xmm5,48(%rdi)
1322         .byte   0xf3,0xc3
1323 .size   ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
1324
1325
1326 .type   ecp_nistz256_avx2_select_w5,@function
1327 .align  32
1328 ecp_nistz256_avx2_select_w5:
1329 .Lavx2_select_w5:
1330         vzeroupper
1331         vmovdqa .LTwo(%rip),%ymm0
1332
1333         vpxor   %ymm2,%ymm2,%ymm2
1334         vpxor   %ymm3,%ymm3,%ymm3
1335         vpxor   %ymm4,%ymm4,%ymm4
1336
1337         vmovdqa .LOne(%rip),%ymm5
1338         vmovdqa .LTwo(%rip),%ymm10
1339
1340         vmovd   %edx,%xmm1
1341         vpermd  %ymm1,%ymm2,%ymm1
1342
1343         movq    $8,%rax
1344 .Lselect_loop_avx2_w5:
1345
1346         vmovdqa 0(%rsi),%ymm6
1347         vmovdqa 32(%rsi),%ymm7
1348         vmovdqa 64(%rsi),%ymm8
1349
1350         vmovdqa 96(%rsi),%ymm11
1351         vmovdqa 128(%rsi),%ymm12
1352         vmovdqa 160(%rsi),%ymm13
1353
1354         vpcmpeqd        %ymm1,%ymm5,%ymm9
1355         vpcmpeqd        %ymm1,%ymm10,%ymm14
1356
1357         vpaddd  %ymm0,%ymm5,%ymm5
1358         vpaddd  %ymm0,%ymm10,%ymm10
1359         leaq    192(%rsi),%rsi
1360
1361         vpand   %ymm9,%ymm6,%ymm6
1362         vpand   %ymm9,%ymm7,%ymm7
1363         vpand   %ymm9,%ymm8,%ymm8
1364         vpand   %ymm14,%ymm11,%ymm11
1365         vpand   %ymm14,%ymm12,%ymm12
1366         vpand   %ymm14,%ymm13,%ymm13
1367
1368         vpxor   %ymm6,%ymm2,%ymm2
1369         vpxor   %ymm7,%ymm3,%ymm3
1370         vpxor   %ymm8,%ymm4,%ymm4
1371         vpxor   %ymm11,%ymm2,%ymm2
1372         vpxor   %ymm12,%ymm3,%ymm3
1373         vpxor   %ymm13,%ymm4,%ymm4
1374
1375         decq    %rax
1376         jnz     .Lselect_loop_avx2_w5
1377
1378         vmovdqu %ymm2,0(%rdi)
1379         vmovdqu %ymm3,32(%rdi)
1380         vmovdqu %ymm4,64(%rdi)
1381         vzeroupper
1382         .byte   0xf3,0xc3
1383 .size   ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
1384
1385
1386
1387 .globl  ecp_nistz256_avx2_select_w7
1388 .type   ecp_nistz256_avx2_select_w7,@function
1389 .align  32
1390 ecp_nistz256_avx2_select_w7:
1391 .Lavx2_select_w7:
1392         vzeroupper
1393         vmovdqa .LThree(%rip),%ymm0
1394
1395         vpxor   %ymm2,%ymm2,%ymm2
1396         vpxor   %ymm3,%ymm3,%ymm3
1397
1398         vmovdqa .LOne(%rip),%ymm4
1399         vmovdqa .LTwo(%rip),%ymm8
1400         vmovdqa .LThree(%rip),%ymm12
1401
1402         vmovd   %edx,%xmm1
1403         vpermd  %ymm1,%ymm2,%ymm1
1404
1405
1406         movq    $21,%rax
1407 .Lselect_loop_avx2_w7:
1408
1409         vmovdqa 0(%rsi),%ymm5
1410         vmovdqa 32(%rsi),%ymm6
1411
1412         vmovdqa 64(%rsi),%ymm9
1413         vmovdqa 96(%rsi),%ymm10
1414
1415         vmovdqa 128(%rsi),%ymm13
1416         vmovdqa 160(%rsi),%ymm14
1417
1418         vpcmpeqd        %ymm1,%ymm4,%ymm7
1419         vpcmpeqd        %ymm1,%ymm8,%ymm11
1420         vpcmpeqd        %ymm1,%ymm12,%ymm15
1421
1422         vpaddd  %ymm0,%ymm4,%ymm4
1423         vpaddd  %ymm0,%ymm8,%ymm8
1424         vpaddd  %ymm0,%ymm12,%ymm12
1425         leaq    192(%rsi),%rsi
1426
1427         vpand   %ymm7,%ymm5,%ymm5
1428         vpand   %ymm7,%ymm6,%ymm6
1429         vpand   %ymm11,%ymm9,%ymm9
1430         vpand   %ymm11,%ymm10,%ymm10
1431         vpand   %ymm15,%ymm13,%ymm13
1432         vpand   %ymm15,%ymm14,%ymm14
1433
1434         vpxor   %ymm5,%ymm2,%ymm2
1435         vpxor   %ymm6,%ymm3,%ymm3
1436         vpxor   %ymm9,%ymm2,%ymm2
1437         vpxor   %ymm10,%ymm3,%ymm3
1438         vpxor   %ymm13,%ymm2,%ymm2
1439         vpxor   %ymm14,%ymm3,%ymm3
1440
1441         decq    %rax
1442         jnz     .Lselect_loop_avx2_w7
1443
1444
1445         vmovdqa 0(%rsi),%ymm5
1446         vmovdqa 32(%rsi),%ymm6
1447
1448         vpcmpeqd        %ymm1,%ymm4,%ymm7
1449
1450         vpand   %ymm7,%ymm5,%ymm5
1451         vpand   %ymm7,%ymm6,%ymm6
1452
1453         vpxor   %ymm5,%ymm2,%ymm2
1454         vpxor   %ymm6,%ymm3,%ymm3
1455
1456         vmovdqu %ymm2,0(%rdi)
1457         vmovdqu %ymm3,32(%rdi)
1458         vzeroupper
1459         .byte   0xf3,0xc3
1460 .size   ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
1461 .type   __ecp_nistz256_add_toq,@function
1462 .align  32
1463 __ecp_nistz256_add_toq:
1464         addq    0(%rbx),%r12
1465         adcq    8(%rbx),%r13
1466         movq    %r12,%rax
1467         adcq    16(%rbx),%r8
1468         adcq    24(%rbx),%r9
1469         movq    %r13,%rbp
1470         sbbq    %r11,%r11
1471
1472         subq    $-1,%r12
1473         movq    %r8,%rcx
1474         sbbq    %r14,%r13
1475         sbbq    $0,%r8
1476         movq    %r9,%r10
1477         sbbq    %r15,%r9
1478         testq   %r11,%r11
1479
1480         cmovzq  %rax,%r12
1481         cmovzq  %rbp,%r13
1482         movq    %r12,0(%rdi)
1483         cmovzq  %rcx,%r8
1484         movq    %r13,8(%rdi)
1485         cmovzq  %r10,%r9
1486         movq    %r8,16(%rdi)
1487         movq    %r9,24(%rdi)
1488
1489         .byte   0xf3,0xc3
1490 .size   __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
1491
1492 .type   __ecp_nistz256_sub_fromq,@function
1493 .align  32
1494 __ecp_nistz256_sub_fromq:
1495         subq    0(%rbx),%r12
1496         sbbq    8(%rbx),%r13
1497         movq    %r12,%rax
1498         sbbq    16(%rbx),%r8
1499         sbbq    24(%rbx),%r9
1500         movq    %r13,%rbp
1501         sbbq    %r11,%r11
1502
1503         addq    $-1,%r12
1504         movq    %r8,%rcx
1505         adcq    %r14,%r13
1506         adcq    $0,%r8
1507         movq    %r9,%r10
1508         adcq    %r15,%r9
1509         testq   %r11,%r11
1510
1511         cmovzq  %rax,%r12
1512         cmovzq  %rbp,%r13
1513         movq    %r12,0(%rdi)
1514         cmovzq  %rcx,%r8
1515         movq    %r13,8(%rdi)
1516         cmovzq  %r10,%r9
1517         movq    %r8,16(%rdi)
1518         movq    %r9,24(%rdi)
1519
1520         .byte   0xf3,0xc3
1521 .size   __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
1522
1523 .type   __ecp_nistz256_subq,@function
1524 .align  32
1525 __ecp_nistz256_subq:
1526         subq    %r12,%rax
1527         sbbq    %r13,%rbp
1528         movq    %rax,%r12
1529         sbbq    %r8,%rcx
1530         sbbq    %r9,%r10
1531         movq    %rbp,%r13
1532         sbbq    %r11,%r11
1533
1534         addq    $-1,%rax
1535         movq    %rcx,%r8
1536         adcq    %r14,%rbp
1537         adcq    $0,%rcx
1538         movq    %r10,%r9
1539         adcq    %r15,%r10
1540         testq   %r11,%r11
1541
1542         cmovnzq %rax,%r12
1543         cmovnzq %rbp,%r13
1544         cmovnzq %rcx,%r8
1545         cmovnzq %r10,%r9
1546
1547         .byte   0xf3,0xc3
1548 .size   __ecp_nistz256_subq,.-__ecp_nistz256_subq
1549
1550 .type   __ecp_nistz256_mul_by_2q,@function
1551 .align  32
1552 __ecp_nistz256_mul_by_2q:
1553         addq    %r12,%r12
1554         adcq    %r13,%r13
1555         movq    %r12,%rax
1556         adcq    %r8,%r8
1557         adcq    %r9,%r9
1558         movq    %r13,%rbp
1559         sbbq    %r11,%r11
1560
1561         subq    $-1,%r12
1562         movq    %r8,%rcx
1563         sbbq    %r14,%r13
1564         sbbq    $0,%r8
1565         movq    %r9,%r10
1566         sbbq    %r15,%r9
1567         testq   %r11,%r11
1568
1569         cmovzq  %rax,%r12
1570         cmovzq  %rbp,%r13
1571         movq    %r12,0(%rdi)
1572         cmovzq  %rcx,%r8
1573         movq    %r13,8(%rdi)
1574         cmovzq  %r10,%r9
1575         movq    %r8,16(%rdi)
1576         movq    %r9,24(%rdi)
1577
1578         .byte   0xf3,0xc3
1579 .size   __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
1580 .globl  ecp_nistz256_point_double
1581 .type   ecp_nistz256_point_double,@function
1582 .align  32
1583 ecp_nistz256_point_double:
1584         movl    $0x80100,%ecx
1585         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
1586         cmpl    $0x80100,%ecx
1587         je      .Lpoint_doublex
1588         pushq   %rbp
1589         pushq   %rbx
1590         pushq   %r12
1591         pushq   %r13
1592         pushq   %r14
1593         pushq   %r15
1594         subq    $160+8,%rsp
1595
1596 .Lpoint_double_shortcutq:
1597         movdqu  0(%rsi),%xmm0
1598         movq    %rsi,%rbx
1599         movdqu  16(%rsi),%xmm1
1600         movq    32+0(%rsi),%r12
1601         movq    32+8(%rsi),%r13
1602         movq    32+16(%rsi),%r8
1603         movq    32+24(%rsi),%r9
1604         movq    .Lpoly+8(%rip),%r14
1605         movq    .Lpoly+24(%rip),%r15
1606         movdqa  %xmm0,96(%rsp)
1607         movdqa  %xmm1,96+16(%rsp)
1608         leaq    32(%rdi),%r10
1609         leaq    64(%rdi),%r11
1610 .byte   102,72,15,110,199
1611 .byte   102,73,15,110,202
1612 .byte   102,73,15,110,211
1613
1614         leaq    0(%rsp),%rdi
1615         call    __ecp_nistz256_mul_by_2q
1616
1617         movq    64+0(%rsi),%rax
1618         movq    64+8(%rsi),%r14
1619         movq    64+16(%rsi),%r15
1620         movq    64+24(%rsi),%r8
1621         leaq    64-0(%rsi),%rsi
1622         leaq    64(%rsp),%rdi
1623         call    __ecp_nistz256_sqr_montq
1624
1625         movq    0+0(%rsp),%rax
1626         movq    8+0(%rsp),%r14
1627         leaq    0+0(%rsp),%rsi
1628         movq    16+0(%rsp),%r15
1629         movq    24+0(%rsp),%r8
1630         leaq    0(%rsp),%rdi
1631         call    __ecp_nistz256_sqr_montq
1632
1633         movq    32(%rbx),%rax
1634         movq    64+0(%rbx),%r9
1635         movq    64+8(%rbx),%r10
1636         movq    64+16(%rbx),%r11
1637         movq    64+24(%rbx),%r12
1638         leaq    64-0(%rbx),%rsi
1639         leaq    32(%rbx),%rbx
1640 .byte   102,72,15,126,215
1641         call    __ecp_nistz256_mul_montq
1642         call    __ecp_nistz256_mul_by_2q
1643
1644         movq    96+0(%rsp),%r12
1645         movq    96+8(%rsp),%r13
1646         leaq    64(%rsp),%rbx
1647         movq    96+16(%rsp),%r8
1648         movq    96+24(%rsp),%r9
1649         leaq    32(%rsp),%rdi
1650         call    __ecp_nistz256_add_toq
1651
1652         movq    96+0(%rsp),%r12
1653         movq    96+8(%rsp),%r13
1654         leaq    64(%rsp),%rbx
1655         movq    96+16(%rsp),%r8
1656         movq    96+24(%rsp),%r9
1657         leaq    64(%rsp),%rdi
1658         call    __ecp_nistz256_sub_fromq
1659
1660         movq    0+0(%rsp),%rax
1661         movq    8+0(%rsp),%r14
1662         leaq    0+0(%rsp),%rsi
1663         movq    16+0(%rsp),%r15
1664         movq    24+0(%rsp),%r8
1665 .byte   102,72,15,126,207
1666         call    __ecp_nistz256_sqr_montq
1667         xorq    %r9,%r9
1668         movq    %r12,%rax
1669         addq    $-1,%r12
1670         movq    %r13,%r10
1671         adcq    %rsi,%r13
1672         movq    %r14,%rcx
1673         adcq    $0,%r14
1674         movq    %r15,%r8
1675         adcq    %rbp,%r15
1676         adcq    $0,%r9
1677         xorq    %rsi,%rsi
1678         testq   $1,%rax
1679
1680         cmovzq  %rax,%r12
1681         cmovzq  %r10,%r13
1682         cmovzq  %rcx,%r14
1683         cmovzq  %r8,%r15
1684         cmovzq  %rsi,%r9
1685
1686         movq    %r13,%rax
1687         shrq    $1,%r12
1688         shlq    $63,%rax
1689         movq    %r14,%r10
1690         shrq    $1,%r13
1691         orq     %rax,%r12
1692         shlq    $63,%r10
1693         movq    %r15,%rcx
1694         shrq    $1,%r14
1695         orq     %r10,%r13
1696         shlq    $63,%rcx
1697         movq    %r12,0(%rdi)
1698         shrq    $1,%r15
1699         movq    %r13,8(%rdi)
1700         shlq    $63,%r9
1701         orq     %rcx,%r14
1702         orq     %r9,%r15
1703         movq    %r14,16(%rdi)
1704         movq    %r15,24(%rdi)
1705         movq    64(%rsp),%rax
1706         leaq    64(%rsp),%rbx
1707         movq    0+32(%rsp),%r9
1708         movq    8+32(%rsp),%r10
1709         leaq    0+32(%rsp),%rsi
1710         movq    16+32(%rsp),%r11
1711         movq    24+32(%rsp),%r12
1712         leaq    32(%rsp),%rdi
1713         call    __ecp_nistz256_mul_montq
1714
1715         leaq    128(%rsp),%rdi
1716         call    __ecp_nistz256_mul_by_2q
1717
1718         leaq    32(%rsp),%rbx
1719         leaq    32(%rsp),%rdi
1720         call    __ecp_nistz256_add_toq
1721
1722         movq    96(%rsp),%rax
1723         leaq    96(%rsp),%rbx
1724         movq    0+0(%rsp),%r9
1725         movq    8+0(%rsp),%r10
1726         leaq    0+0(%rsp),%rsi
1727         movq    16+0(%rsp),%r11
1728         movq    24+0(%rsp),%r12
1729         leaq    0(%rsp),%rdi
1730         call    __ecp_nistz256_mul_montq
1731
1732         leaq    128(%rsp),%rdi
1733         call    __ecp_nistz256_mul_by_2q
1734
1735         movq    0+32(%rsp),%rax
1736         movq    8+32(%rsp),%r14
1737         leaq    0+32(%rsp),%rsi
1738         movq    16+32(%rsp),%r15
1739         movq    24+32(%rsp),%r8
1740 .byte   102,72,15,126,199
1741         call    __ecp_nistz256_sqr_montq
1742
1743         leaq    128(%rsp),%rbx
1744         movq    %r14,%r8
1745         movq    %r15,%r9
1746         movq    %rsi,%r14
1747         movq    %rbp,%r15
1748         call    __ecp_nistz256_sub_fromq
1749
1750         movq    0+0(%rsp),%rax
1751         movq    0+8(%rsp),%rbp
1752         movq    0+16(%rsp),%rcx
1753         movq    0+24(%rsp),%r10
1754         leaq    0(%rsp),%rdi
1755         call    __ecp_nistz256_subq
1756
1757         movq    32(%rsp),%rax
1758         leaq    32(%rsp),%rbx
1759         movq    %r12,%r14
1760         xorl    %ecx,%ecx
1761         movq    %r12,0+0(%rsp)
1762         movq    %r13,%r10
1763         movq    %r13,0+8(%rsp)
1764         cmovzq  %r8,%r11
1765         movq    %r8,0+16(%rsp)
1766         leaq    0-0(%rsp),%rsi
1767         cmovzq  %r9,%r12
1768         movq    %r9,0+24(%rsp)
1769         movq    %r14,%r9
1770         leaq    0(%rsp),%rdi
1771         call    __ecp_nistz256_mul_montq
1772
1773 .byte   102,72,15,126,203
1774 .byte   102,72,15,126,207
1775         call    __ecp_nistz256_sub_fromq
1776
1777         addq    $160+8,%rsp
1778         popq    %r15
1779         popq    %r14
1780         popq    %r13
1781         popq    %r12
1782         popq    %rbx
1783         popq    %rbp
1784         .byte   0xf3,0xc3
1785 .size   ecp_nistz256_point_double,.-ecp_nistz256_point_double
1786 .globl  ecp_nistz256_point_add
1787 .type   ecp_nistz256_point_add,@function
1788 .align  32
1789 ecp_nistz256_point_add:
1790         movl    $0x80100,%ecx
1791         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
1792         cmpl    $0x80100,%ecx
1793         je      .Lpoint_addx
1794         pushq   %rbp
1795         pushq   %rbx
1796         pushq   %r12
1797         pushq   %r13
1798         pushq   %r14
1799         pushq   %r15
1800         subq    $576+8,%rsp
1801
1802         movdqu  0(%rsi),%xmm0
1803         movdqu  16(%rsi),%xmm1
1804         movdqu  32(%rsi),%xmm2
1805         movdqu  48(%rsi),%xmm3
1806         movdqu  64(%rsi),%xmm4
1807         movdqu  80(%rsi),%xmm5
1808         movq    %rsi,%rbx
1809         movq    %rdx,%rsi
1810         movdqa  %xmm0,384(%rsp)
1811         movdqa  %xmm1,384+16(%rsp)
1812         por     %xmm0,%xmm1
1813         movdqa  %xmm2,416(%rsp)
1814         movdqa  %xmm3,416+16(%rsp)
1815         por     %xmm2,%xmm3
1816         movdqa  %xmm4,448(%rsp)
1817         movdqa  %xmm5,448+16(%rsp)
1818         por     %xmm1,%xmm3
1819
1820         movdqu  0(%rsi),%xmm0
1821         pshufd  $0xb1,%xmm3,%xmm5
1822         movdqu  16(%rsi),%xmm1
1823         movdqu  32(%rsi),%xmm2
1824         por     %xmm3,%xmm5
1825         movdqu  48(%rsi),%xmm3
1826         movq    64+0(%rsi),%rax
1827         movq    64+8(%rsi),%r14
1828         movq    64+16(%rsi),%r15
1829         movq    64+24(%rsi),%r8
1830         movdqa  %xmm0,480(%rsp)
1831         pshufd  $0x1e,%xmm5,%xmm4
1832         movdqa  %xmm1,480+16(%rsp)
1833         por     %xmm0,%xmm1
1834 .byte   102,72,15,110,199
1835         movdqa  %xmm2,512(%rsp)
1836         movdqa  %xmm3,512+16(%rsp)
1837         por     %xmm2,%xmm3
1838         por     %xmm4,%xmm5
1839         pxor    %xmm4,%xmm4
1840         por     %xmm1,%xmm3
1841
1842         leaq    64-0(%rsi),%rsi
1843         movq    %rax,544+0(%rsp)
1844         movq    %r14,544+8(%rsp)
1845         movq    %r15,544+16(%rsp)
1846         movq    %r8,544+24(%rsp)
1847         leaq    96(%rsp),%rdi
1848         call    __ecp_nistz256_sqr_montq
1849
1850         pcmpeqd %xmm4,%xmm5
1851         pshufd  $0xb1,%xmm3,%xmm4
1852         por     %xmm3,%xmm4
1853         pshufd  $0,%xmm5,%xmm5
1854         pshufd  $0x1e,%xmm4,%xmm3
1855         por     %xmm3,%xmm4
1856         pxor    %xmm3,%xmm3
1857         pcmpeqd %xmm3,%xmm4
1858         pshufd  $0,%xmm4,%xmm4
1859         movq    64+0(%rbx),%rax
1860         movq    64+8(%rbx),%r14
1861         movq    64+16(%rbx),%r15
1862         movq    64+24(%rbx),%r8
1863 .byte   102,72,15,110,203
1864
1865         leaq    64-0(%rbx),%rsi
1866         leaq    32(%rsp),%rdi
1867         call    __ecp_nistz256_sqr_montq
1868
1869         movq    544(%rsp),%rax
1870         leaq    544(%rsp),%rbx
1871         movq    0+96(%rsp),%r9
1872         movq    8+96(%rsp),%r10
1873         leaq    0+96(%rsp),%rsi
1874         movq    16+96(%rsp),%r11
1875         movq    24+96(%rsp),%r12
1876         leaq    224(%rsp),%rdi
1877         call    __ecp_nistz256_mul_montq
1878
1879         movq    448(%rsp),%rax
1880         leaq    448(%rsp),%rbx
1881         movq    0+32(%rsp),%r9
1882         movq    8+32(%rsp),%r10
1883         leaq    0+32(%rsp),%rsi
1884         movq    16+32(%rsp),%r11
1885         movq    24+32(%rsp),%r12
1886         leaq    256(%rsp),%rdi
1887         call    __ecp_nistz256_mul_montq
1888
1889         movq    416(%rsp),%rax
1890         leaq    416(%rsp),%rbx
1891         movq    0+224(%rsp),%r9
1892         movq    8+224(%rsp),%r10
1893         leaq    0+224(%rsp),%rsi
1894         movq    16+224(%rsp),%r11
1895         movq    24+224(%rsp),%r12
1896         leaq    224(%rsp),%rdi
1897         call    __ecp_nistz256_mul_montq
1898
1899         movq    512(%rsp),%rax
1900         leaq    512(%rsp),%rbx
1901         movq    0+256(%rsp),%r9
1902         movq    8+256(%rsp),%r10
1903         leaq    0+256(%rsp),%rsi
1904         movq    16+256(%rsp),%r11
1905         movq    24+256(%rsp),%r12
1906         leaq    256(%rsp),%rdi
1907         call    __ecp_nistz256_mul_montq
1908
1909         leaq    224(%rsp),%rbx
1910         leaq    64(%rsp),%rdi
1911         call    __ecp_nistz256_sub_fromq
1912
1913         orq     %r13,%r12
1914         movdqa  %xmm4,%xmm2
1915         orq     %r8,%r12
1916         orq     %r9,%r12
1917         por     %xmm5,%xmm2
1918 .byte   102,73,15,110,220
1919
1920         movq    384(%rsp),%rax
1921         leaq    384(%rsp),%rbx
1922         movq    0+96(%rsp),%r9
1923         movq    8+96(%rsp),%r10
1924         leaq    0+96(%rsp),%rsi
1925         movq    16+96(%rsp),%r11
1926         movq    24+96(%rsp),%r12
1927         leaq    160(%rsp),%rdi
1928         call    __ecp_nistz256_mul_montq
1929
1930         movq    480(%rsp),%rax
1931         leaq    480(%rsp),%rbx
1932         movq    0+32(%rsp),%r9
1933         movq    8+32(%rsp),%r10
1934         leaq    0+32(%rsp),%rsi
1935         movq    16+32(%rsp),%r11
1936         movq    24+32(%rsp),%r12
1937         leaq    192(%rsp),%rdi
1938         call    __ecp_nistz256_mul_montq
1939
1940         leaq    160(%rsp),%rbx
1941         leaq    0(%rsp),%rdi
1942         call    __ecp_nistz256_sub_fromq
1943
1944         orq     %r13,%r12
1945         orq     %r8,%r12
1946         orq     %r9,%r12
1947
1948 .byte   0x3e
1949         jnz     .Ladd_proceedq
1950 .byte   102,73,15,126,208
1951 .byte   102,73,15,126,217
1952         testq   %r8,%r8
1953         jnz     .Ladd_proceedq
1954         testq   %r9,%r9
1955         jz      .Ladd_doubleq
1956
1957 .byte   102,72,15,126,199
1958         pxor    %xmm0,%xmm0
1959         movdqu  %xmm0,0(%rdi)
1960         movdqu  %xmm0,16(%rdi)
1961         movdqu  %xmm0,32(%rdi)
1962         movdqu  %xmm0,48(%rdi)
1963         movdqu  %xmm0,64(%rdi)
1964         movdqu  %xmm0,80(%rdi)
1965         jmp     .Ladd_doneq
1966
1967 .align  32
1968 .Ladd_doubleq:
1969 .byte   102,72,15,126,206
1970 .byte   102,72,15,126,199
1971         addq    $416,%rsp
1972         jmp     .Lpoint_double_shortcutq
1973
1974 .align  32
1975 .Ladd_proceedq:
1976         movq    0+64(%rsp),%rax
1977         movq    8+64(%rsp),%r14
1978         leaq    0+64(%rsp),%rsi
1979         movq    16+64(%rsp),%r15
1980         movq    24+64(%rsp),%r8
1981         leaq    96(%rsp),%rdi
1982         call    __ecp_nistz256_sqr_montq
1983
1984         movq    448(%rsp),%rax
1985         leaq    448(%rsp),%rbx
1986         movq    0+0(%rsp),%r9
1987         movq    8+0(%rsp),%r10
1988         leaq    0+0(%rsp),%rsi
1989         movq    16+0(%rsp),%r11
1990         movq    24+0(%rsp),%r12
1991         leaq    352(%rsp),%rdi
1992         call    __ecp_nistz256_mul_montq
1993
1994         movq    0+0(%rsp),%rax
1995         movq    8+0(%rsp),%r14
1996         leaq    0+0(%rsp),%rsi
1997         movq    16+0(%rsp),%r15
1998         movq    24+0(%rsp),%r8
1999         leaq    32(%rsp),%rdi
2000         call    __ecp_nistz256_sqr_montq
2001
2002         movq    544(%rsp),%rax
2003         leaq    544(%rsp),%rbx
2004         movq    0+352(%rsp),%r9
2005         movq    8+352(%rsp),%r10
2006         leaq    0+352(%rsp),%rsi
2007         movq    16+352(%rsp),%r11
2008         movq    24+352(%rsp),%r12
2009         leaq    352(%rsp),%rdi
2010         call    __ecp_nistz256_mul_montq
2011
2012         movq    0(%rsp),%rax
2013         leaq    0(%rsp),%rbx
2014         movq    0+32(%rsp),%r9
2015         movq    8+32(%rsp),%r10
2016         leaq    0+32(%rsp),%rsi
2017         movq    16+32(%rsp),%r11
2018         movq    24+32(%rsp),%r12
2019         leaq    128(%rsp),%rdi
2020         call    __ecp_nistz256_mul_montq
2021
2022         movq    160(%rsp),%rax
2023         leaq    160(%rsp),%rbx
2024         movq    0+32(%rsp),%r9
2025         movq    8+32(%rsp),%r10
2026         leaq    0+32(%rsp),%rsi
2027         movq    16+32(%rsp),%r11
2028         movq    24+32(%rsp),%r12
2029         leaq    192(%rsp),%rdi
2030         call    __ecp_nistz256_mul_montq
2031
2032
2033
2034
2035         addq    %r12,%r12
2036         leaq    96(%rsp),%rsi
2037         adcq    %r13,%r13
2038         movq    %r12,%rax
2039         adcq    %r8,%r8
2040         adcq    %r9,%r9
2041         movq    %r13,%rbp
2042         sbbq    %r11,%r11
2043
2044         subq    $-1,%r12
2045         movq    %r8,%rcx
2046         sbbq    %r14,%r13
2047         sbbq    $0,%r8
2048         movq    %r9,%r10
2049         sbbq    %r15,%r9
2050         testq   %r11,%r11
2051
2052         cmovzq  %rax,%r12
2053         movq    0(%rsi),%rax
2054         cmovzq  %rbp,%r13
2055         movq    8(%rsi),%rbp
2056         cmovzq  %rcx,%r8
2057         movq    16(%rsi),%rcx
2058         cmovzq  %r10,%r9
2059         movq    24(%rsi),%r10
2060
2061         call    __ecp_nistz256_subq
2062
2063         leaq    128(%rsp),%rbx
2064         leaq    288(%rsp),%rdi
2065         call    __ecp_nistz256_sub_fromq
2066
2067         movq    192+0(%rsp),%rax
2068         movq    192+8(%rsp),%rbp
2069         movq    192+16(%rsp),%rcx
2070         movq    192+24(%rsp),%r10
2071         leaq    320(%rsp),%rdi
2072
2073         call    __ecp_nistz256_subq
2074
2075         movq    %r12,0(%rdi)
2076         movq    %r13,8(%rdi)
2077         movq    %r8,16(%rdi)
2078         movq    %r9,24(%rdi)
2079         movq    128(%rsp),%rax
2080         leaq    128(%rsp),%rbx
2081         movq    0+224(%rsp),%r9
2082         movq    8+224(%rsp),%r10
2083         leaq    0+224(%rsp),%rsi
2084         movq    16+224(%rsp),%r11
2085         movq    24+224(%rsp),%r12
2086         leaq    256(%rsp),%rdi
2087         call    __ecp_nistz256_mul_montq
2088
2089         movq    320(%rsp),%rax
2090         leaq    320(%rsp),%rbx
2091         movq    0+64(%rsp),%r9
2092         movq    8+64(%rsp),%r10
2093         leaq    0+64(%rsp),%rsi
2094         movq    16+64(%rsp),%r11
2095         movq    24+64(%rsp),%r12
2096         leaq    320(%rsp),%rdi
2097         call    __ecp_nistz256_mul_montq
2098
2099         leaq    256(%rsp),%rbx
2100         leaq    320(%rsp),%rdi
2101         call    __ecp_nistz256_sub_fromq
2102
2103 .byte   102,72,15,126,199
2104
2105         movdqa  %xmm5,%xmm0
2106         movdqa  %xmm5,%xmm1
2107         pandn   352(%rsp),%xmm0
2108         movdqa  %xmm5,%xmm2
2109         pandn   352+16(%rsp),%xmm1
2110         movdqa  %xmm5,%xmm3
2111         pand    544(%rsp),%xmm2
2112         pand    544+16(%rsp),%xmm3
2113         por     %xmm0,%xmm2
2114         por     %xmm1,%xmm3
2115
2116         movdqa  %xmm4,%xmm0
2117         movdqa  %xmm4,%xmm1
2118         pandn   %xmm2,%xmm0
2119         movdqa  %xmm4,%xmm2
2120         pandn   %xmm3,%xmm1
2121         movdqa  %xmm4,%xmm3
2122         pand    448(%rsp),%xmm2
2123         pand    448+16(%rsp),%xmm3
2124         por     %xmm0,%xmm2
2125         por     %xmm1,%xmm3
2126         movdqu  %xmm2,64(%rdi)
2127         movdqu  %xmm3,80(%rdi)
2128
2129         movdqa  %xmm5,%xmm0
2130         movdqa  %xmm5,%xmm1
2131         pandn   288(%rsp),%xmm0
2132         movdqa  %xmm5,%xmm2
2133         pandn   288+16(%rsp),%xmm1
2134         movdqa  %xmm5,%xmm3
2135         pand    480(%rsp),%xmm2
2136         pand    480+16(%rsp),%xmm3
2137         por     %xmm0,%xmm2
2138         por     %xmm1,%xmm3
2139
2140         movdqa  %xmm4,%xmm0
2141         movdqa  %xmm4,%xmm1
2142         pandn   %xmm2,%xmm0
2143         movdqa  %xmm4,%xmm2
2144         pandn   %xmm3,%xmm1
2145         movdqa  %xmm4,%xmm3
2146         pand    384(%rsp),%xmm2
2147         pand    384+16(%rsp),%xmm3
2148         por     %xmm0,%xmm2
2149         por     %xmm1,%xmm3
2150         movdqu  %xmm2,0(%rdi)
2151         movdqu  %xmm3,16(%rdi)
2152
2153         movdqa  %xmm5,%xmm0
2154         movdqa  %xmm5,%xmm1
2155         pandn   320(%rsp),%xmm0
2156         movdqa  %xmm5,%xmm2
2157         pandn   320+16(%rsp),%xmm1
2158         movdqa  %xmm5,%xmm3
2159         pand    512(%rsp),%xmm2
2160         pand    512+16(%rsp),%xmm3
2161         por     %xmm0,%xmm2
2162         por     %xmm1,%xmm3
2163
2164         movdqa  %xmm4,%xmm0
2165         movdqa  %xmm4,%xmm1
2166         pandn   %xmm2,%xmm0
2167         movdqa  %xmm4,%xmm2
2168         pandn   %xmm3,%xmm1
2169         movdqa  %xmm4,%xmm3
2170         pand    416(%rsp),%xmm2
2171         pand    416+16(%rsp),%xmm3
2172         por     %xmm0,%xmm2
2173         por     %xmm1,%xmm3
2174         movdqu  %xmm2,32(%rdi)
2175         movdqu  %xmm3,48(%rdi)
2176
2177 .Ladd_doneq:
2178         addq    $576+8,%rsp
2179         popq    %r15
2180         popq    %r14
2181         popq    %r13
2182         popq    %r12
2183         popq    %rbx
2184         popq    %rbp
2185         .byte   0xf3,0xc3
2186 .size   ecp_nistz256_point_add,.-ecp_nistz256_point_add
2187 .globl  ecp_nistz256_point_add_affine
2188 .type   ecp_nistz256_point_add_affine,@function
2189 .align  32
2190 ecp_nistz256_point_add_affine:
2191         movl    $0x80100,%ecx
2192         andl    OPENSSL_ia32cap_P+8(%rip),%ecx
2193         cmpl    $0x80100,%ecx
2194         je      .Lpoint_add_affinex
2195         pushq   %rbp
2196         pushq   %rbx
2197         pushq   %r12
2198         pushq   %r13
2199         pushq   %r14
2200         pushq   %r15
2201         subq    $480+8,%rsp
2202
2203         movdqu  0(%rsi),%xmm0
2204         movq    %rdx,%rbx
2205         movdqu  16(%rsi),%xmm1
2206         movdqu  32(%rsi),%xmm2
2207         movdqu  48(%rsi),%xmm3
2208         movdqu  64(%rsi),%xmm4
2209         movdqu  80(%rsi),%xmm5
2210         movq    64+0(%rsi),%rax
2211         movq    64+8(%rsi),%r14
2212         movq    64+16(%rsi),%r15
2213         movq    64+24(%rsi),%r8
2214         movdqa  %xmm0,320(%rsp)
2215         movdqa  %xmm1,320+16(%rsp)
2216         por     %xmm0,%xmm1
2217         movdqa  %xmm2,352(%rsp)
2218         movdqa  %xmm3,352+16(%rsp)
2219         por     %xmm2,%xmm3
2220         movdqa  %xmm4,384(%rsp)
2221         movdqa  %xmm5,384+16(%rsp)
2222         por     %xmm1,%xmm3
2223
2224         movdqu  0(%rbx),%xmm0
2225         pshufd  $0xb1,%xmm3,%xmm5
2226         movdqu  16(%rbx),%xmm1
2227         movdqu  32(%rbx),%xmm2
2228         por     %xmm3,%xmm5
2229         movdqu  48(%rbx),%xmm3
2230         movdqa  %xmm0,416(%rsp)
2231         pshufd  $0x1e,%xmm5,%xmm4
2232         movdqa  %xmm1,416+16(%rsp)
2233         por     %xmm0,%xmm1
2234 .byte   102,72,15,110,199
2235         movdqa  %xmm2,448(%rsp)
2236         movdqa  %xmm3,448+16(%rsp)
2237         por     %xmm2,%xmm3
2238         por     %xmm4,%xmm5
2239         pxor    %xmm4,%xmm4
2240         por     %xmm1,%xmm3
2241
2242         leaq    64-0(%rsi),%rsi
2243         leaq    32(%rsp),%rdi
2244         call    __ecp_nistz256_sqr_montq
2245
2246         pcmpeqd %xmm4,%xmm5
2247         pshufd  $0xb1,%xmm3,%xmm4
2248         movq    0(%rbx),%rax
2249
2250         movq    %r12,%r9
2251         por     %xmm3,%xmm4
2252         pshufd  $0,%xmm5,%xmm5
2253         pshufd  $0x1e,%xmm4,%xmm3
2254         movq    %r13,%r10
2255         por     %xmm3,%xmm4
2256         pxor    %xmm3,%xmm3
2257         movq    %r14,%r11
2258         pcmpeqd %xmm3,%xmm4
2259         pshufd  $0,%xmm4,%xmm4
2260
2261         leaq    32-0(%rsp),%rsi
2262         movq    %r15,%r12
2263         leaq    0(%rsp),%rdi
2264         call    __ecp_nistz256_mul_montq
2265
2266         leaq    320(%rsp),%rbx
2267         leaq    64(%rsp),%rdi
2268         call    __ecp_nistz256_sub_fromq
2269
2270         movq    384(%rsp),%rax
2271         leaq    384(%rsp),%rbx
2272         movq    0+32(%rsp),%r9
2273         movq    8+32(%rsp),%r10
2274         leaq    0+32(%rsp),%rsi
2275         movq    16+32(%rsp),%r11
2276         movq    24+32(%rsp),%r12
2277         leaq    32(%rsp),%rdi
2278         call    __ecp_nistz256_mul_montq
2279
2280         movq    384(%rsp),%rax
2281         leaq    384(%rsp),%rbx
2282         movq    0+64(%rsp),%r9
2283         movq    8+64(%rsp),%r10
2284         leaq    0+64(%rsp),%rsi
2285         movq    16+64(%rsp),%r11
2286         movq    24+64(%rsp),%r12
2287         leaq    288(%rsp),%rdi
2288         call    __ecp_nistz256_mul_montq
2289
2290         movq    448(%rsp),%rax
2291         leaq    448(%rsp),%rbx
2292         movq    0+32(%rsp),%r9
2293         movq    8+32(%rsp),%r10
2294         leaq    0+32(%rsp),%rsi
2295         movq    16+32(%rsp),%r11
2296         movq    24+32(%rsp),%r12
2297         leaq    32(%rsp),%rdi
2298         call    __ecp_nistz256_mul_montq
2299
2300         leaq    352(%rsp),%rbx
2301         leaq    96(%rsp),%rdi
2302         call    __ecp_nistz256_sub_fromq
2303
2304         movq    0+64(%rsp),%rax
2305         movq    8+64(%rsp),%r14
2306         leaq    0+64(%rsp),%rsi
2307         movq    16+64(%rsp),%r15
2308         movq    24+64(%rsp),%r8
2309         leaq    128(%rsp),%rdi
2310         call    __ecp_nistz256_sqr_montq
2311
2312         movq    0+96(%rsp),%rax
2313         movq    8+96(%rsp),%r14
2314         leaq    0+96(%rsp),%rsi
2315         movq    16+96(%rsp),%r15
2316         movq    24+96(%rsp),%r8
2317         leaq    192(%rsp),%rdi
2318         call    __ecp_nistz256_sqr_montq
2319
2320         movq    128(%rsp),%rax
2321         leaq    128(%rsp),%rbx
2322         movq    0+64(%rsp),%r9
2323         movq    8+64(%rsp),%r10
2324         leaq    0+64(%rsp),%rsi
2325         movq    16+64(%rsp),%r11
2326         movq    24+64(%rsp),%r12
2327         leaq    160(%rsp),%rdi
2328         call    __ecp_nistz256_mul_montq
2329
2330         movq    320(%rsp),%rax
2331         leaq    320(%rsp),%rbx
2332         movq    0+128(%rsp),%r9
2333         movq    8+128(%rsp),%r10
2334         leaq    0+128(%rsp),%rsi
2335         movq    16+128(%rsp),%r11
2336         movq    24+128(%rsp),%r12
2337         leaq    0(%rsp),%rdi
2338         call    __ecp_nistz256_mul_montq
2339
2340
2341
2342
2343         addq    %r12,%r12
2344         leaq    192(%rsp),%rsi
2345         adcq    %r13,%r13
2346         movq    %r12,%rax
2347         adcq    %r8,%r8
2348         adcq    %r9,%r9
2349         movq    %r13,%rbp
2350         sbbq    %r11,%r11
2351
2352         subq    $-1,%r12
2353         movq    %r8,%rcx
2354         sbbq    %r14,%r13
2355         sbbq    $0,%r8
2356         movq    %r9,%r10
2357         sbbq    %r15,%r9
2358         testq   %r11,%r11
2359
2360         cmovzq  %rax,%r12
2361         movq    0(%rsi),%rax
2362         cmovzq  %rbp,%r13
2363         movq    8(%rsi),%rbp
2364         cmovzq  %rcx,%r8
2365         movq    16(%rsi),%rcx
2366         cmovzq  %r10,%r9
2367         movq    24(%rsi),%r10
2368
2369         call    __ecp_nistz256_subq
2370
2371         leaq    160(%rsp),%rbx
2372         leaq    224(%rsp),%rdi
2373         call    __ecp_nistz256_sub_fromq
2374
2375         movq    0+0(%rsp),%rax
2376         movq    0+8(%rsp),%rbp
2377         movq    0+16(%rsp),%rcx
2378         movq    0+24(%rsp),%r10
2379         leaq    64(%rsp),%rdi
2380
2381         call    __ecp_nistz256_subq
2382
2383         movq    %r12,0(%rdi)
2384         movq    %r13,8(%rdi)
2385         movq    %r8,16(%rdi)
2386         movq    %r9,24(%rdi)
2387         movq    352(%rsp),%rax
2388         leaq    352(%rsp),%rbx
2389         movq    0+160(%rsp),%r9
2390         movq    8+160(%rsp),%r10
2391         leaq    0+160(%rsp),%rsi
2392         movq    16+160(%rsp),%r11
2393         movq    24+160(%rsp),%r12
2394         leaq    32(%rsp),%rdi
2395         call    __ecp_nistz256_mul_montq
2396
2397         movq    96(%rsp),%rax
2398         leaq    96(%rsp),%rbx
2399         movq    0+64(%rsp),%r9
2400         movq    8+64(%rsp),%r10
2401         leaq    0+64(%rsp),%rsi
2402         movq    16+64(%rsp),%r11
2403         movq    24+64(%rsp),%r12
2404         leaq    64(%rsp),%rdi
2405         call    __ecp_nistz256_mul_montq
2406
2407         leaq    32(%rsp),%rbx
2408         leaq    256(%rsp),%rdi
2409         call    __ecp_nistz256_sub_fromq
2410
2411 .byte   102,72,15,126,199
2412
2413         movdqa  %xmm5,%xmm0
2414         movdqa  %xmm5,%xmm1
2415         pandn   288(%rsp),%xmm0
2416         movdqa  %xmm5,%xmm2
2417         pandn   288+16(%rsp),%xmm1
2418         movdqa  %xmm5,%xmm3
2419         pand    .LONE_mont(%rip),%xmm2
2420         pand    .LONE_mont+16(%rip),%xmm3
2421         por     %xmm0,%xmm2
2422         por     %xmm1,%xmm3
2423
2424         movdqa  %xmm4,%xmm0
2425         movdqa  %xmm4,%xmm1
2426         pandn   %xmm2,%xmm0
2427         movdqa  %xmm4,%xmm2
2428         pandn   %xmm3,%xmm1
2429         movdqa  %xmm4,%xmm3
2430         pand    384(%rsp),%xmm2
2431         pand    384+16(%rsp),%xmm3
2432         por     %xmm0,%xmm2
2433         por     %xmm1,%xmm3
2434         movdqu  %xmm2,64(%rdi)
2435         movdqu  %xmm3,80(%rdi)
2436
2437         movdqa  %xmm5,%xmm0
2438         movdqa  %xmm5,%xmm1
2439         pandn   224(%rsp),%xmm0
2440         movdqa  %xmm5,%xmm2
2441         pandn   224+16(%rsp),%xmm1
2442         movdqa  %xmm5,%xmm3
2443         pand    416(%rsp),%xmm2
2444         pand    416+16(%rsp),%xmm3
2445         por     %xmm0,%xmm2
2446         por     %xmm1,%xmm3
2447
2448         movdqa  %xmm4,%xmm0
2449         movdqa  %xmm4,%xmm1
2450         pandn   %xmm2,%xmm0
2451         movdqa  %xmm4,%xmm2
2452         pandn   %xmm3,%xmm1
2453         movdqa  %xmm4,%xmm3
2454         pand    320(%rsp),%xmm2
2455         pand    320+16(%rsp),%xmm3
2456         por     %xmm0,%xmm2
2457         por     %xmm1,%xmm3
2458         movdqu  %xmm2,0(%rdi)
2459         movdqu  %xmm3,16(%rdi)
2460
2461         movdqa  %xmm5,%xmm0
2462         movdqa  %xmm5,%xmm1
2463         pandn   256(%rsp),%xmm0
2464         movdqa  %xmm5,%xmm2
2465         pandn   256+16(%rsp),%xmm1
2466         movdqa  %xmm5,%xmm3
2467         pand    448(%rsp),%xmm2
2468         pand    448+16(%rsp),%xmm3
2469         por     %xmm0,%xmm2
2470         por     %xmm1,%xmm3
2471
2472         movdqa  %xmm4,%xmm0
2473         movdqa  %xmm4,%xmm1
2474         pandn   %xmm2,%xmm0
2475         movdqa  %xmm4,%xmm2
2476         pandn   %xmm3,%xmm1
2477         movdqa  %xmm4,%xmm3
2478         pand    352(%rsp),%xmm2
2479         pand    352+16(%rsp),%xmm3
2480         por     %xmm0,%xmm2
2481         por     %xmm1,%xmm3
2482         movdqu  %xmm2,32(%rdi)
2483         movdqu  %xmm3,48(%rdi)
2484
2485         addq    $480+8,%rsp
2486         popq    %r15
2487         popq    %r14
2488         popq    %r13
2489         popq    %r12
2490         popq    %rbx
2491         popq    %rbp
2492         .byte   0xf3,0xc3
2493 .size   ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
2494 .type   __ecp_nistz256_add_tox,@function
2495 .align  32
2496 __ecp_nistz256_add_tox:
2497         xorq    %r11,%r11
2498         adcq    0(%rbx),%r12
2499         adcq    8(%rbx),%r13
2500         movq    %r12,%rax
2501         adcq    16(%rbx),%r8
2502         adcq    24(%rbx),%r9
2503         movq    %r13,%rbp
2504         adcq    $0,%r11
2505
2506         xorq    %r10,%r10
2507         sbbq    $-1,%r12
2508         movq    %r8,%rcx
2509         sbbq    %r14,%r13
2510         sbbq    $0,%r8
2511         movq    %r9,%r10
2512         sbbq    %r15,%r9
2513
2514         btq     $0,%r11
2515         cmovncq %rax,%r12
2516         cmovncq %rbp,%r13
2517         movq    %r12,0(%rdi)
2518         cmovncq %rcx,%r8
2519         movq    %r13,8(%rdi)
2520         cmovncq %r10,%r9
2521         movq    %r8,16(%rdi)
2522         movq    %r9,24(%rdi)
2523
2524         .byte   0xf3,0xc3
2525 .size   __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
2526
2527 .type   __ecp_nistz256_sub_fromx,@function
2528 .align  32
2529 __ecp_nistz256_sub_fromx:
2530         xorq    %r11,%r11
2531         sbbq    0(%rbx),%r12
2532         sbbq    8(%rbx),%r13
2533         movq    %r12,%rax
2534         sbbq    16(%rbx),%r8
2535         sbbq    24(%rbx),%r9
2536         movq    %r13,%rbp
2537         sbbq    $0,%r11
2538
2539         xorq    %r10,%r10
2540         adcq    $-1,%r12
2541         movq    %r8,%rcx
2542         adcq    %r14,%r13
2543         adcq    $0,%r8
2544         movq    %r9,%r10
2545         adcq    %r15,%r9
2546
2547         btq     $0,%r11
2548         cmovncq %rax,%r12
2549         cmovncq %rbp,%r13
2550         movq    %r12,0(%rdi)
2551         cmovncq %rcx,%r8
2552         movq    %r13,8(%rdi)
2553         cmovncq %r10,%r9
2554         movq    %r8,16(%rdi)
2555         movq    %r9,24(%rdi)
2556
2557         .byte   0xf3,0xc3
2558 .size   __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
2559
2560 .type   __ecp_nistz256_subx,@function
2561 .align  32
2562 __ecp_nistz256_subx:
2563         xorq    %r11,%r11
2564         sbbq    %r12,%rax
2565         sbbq    %r13,%rbp
2566         movq    %rax,%r12
2567         sbbq    %r8,%rcx
2568         sbbq    %r9,%r10
2569         movq    %rbp,%r13
2570         sbbq    $0,%r11
2571
2572         xorq    %r9,%r9
2573         adcq    $-1,%rax
2574         movq    %rcx,%r8
2575         adcq    %r14,%rbp
2576         adcq    $0,%rcx
2577         movq    %r10,%r9
2578         adcq    %r15,%r10
2579
2580         btq     $0,%r11
2581         cmovcq  %rax,%r12
2582         cmovcq  %rbp,%r13
2583         cmovcq  %rcx,%r8
2584         cmovcq  %r10,%r9
2585
2586         .byte   0xf3,0xc3
2587 .size   __ecp_nistz256_subx,.-__ecp_nistz256_subx
2588
2589 .type   __ecp_nistz256_mul_by_2x,@function
2590 .align  32
2591 __ecp_nistz256_mul_by_2x:
2592         xorq    %r11,%r11
2593         adcq    %r12,%r12
2594         adcq    %r13,%r13
2595         movq    %r12,%rax
2596         adcq    %r8,%r8
2597         adcq    %r9,%r9
2598         movq    %r13,%rbp
2599         adcq    $0,%r11
2600
2601         xorq    %r10,%r10
2602         sbbq    $-1,%r12
2603         movq    %r8,%rcx
2604         sbbq    %r14,%r13
2605         sbbq    $0,%r8
2606         movq    %r9,%r10
2607         sbbq    %r15,%r9
2608
2609         btq     $0,%r11
2610         cmovncq %rax,%r12
2611         cmovncq %rbp,%r13
2612         movq    %r12,0(%rdi)
2613         cmovncq %rcx,%r8
2614         movq    %r13,8(%rdi)
2615         cmovncq %r10,%r9
2616         movq    %r8,16(%rdi)
2617         movq    %r9,24(%rdi)
2618
2619         .byte   0xf3,0xc3
2620 .size   __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
2621 .type   ecp_nistz256_point_doublex,@function
2622 .align  32
2623 ecp_nistz256_point_doublex:
2624 .Lpoint_doublex:
2625         pushq   %rbp
2626         pushq   %rbx
2627         pushq   %r12
2628         pushq   %r13
2629         pushq   %r14
2630         pushq   %r15
2631         subq    $160+8,%rsp
2632
2633 .Lpoint_double_shortcutx:
2634         movdqu  0(%rsi),%xmm0
2635         movq    %rsi,%rbx
2636         movdqu  16(%rsi),%xmm1
2637         movq    32+0(%rsi),%r12
2638         movq    32+8(%rsi),%r13
2639         movq    32+16(%rsi),%r8
2640         movq    32+24(%rsi),%r9
2641         movq    .Lpoly+8(%rip),%r14
2642         movq    .Lpoly+24(%rip),%r15
2643         movdqa  %xmm0,96(%rsp)
2644         movdqa  %xmm1,96+16(%rsp)
2645         leaq    32(%rdi),%r10
2646         leaq    64(%rdi),%r11
2647 .byte   102,72,15,110,199
2648 .byte   102,73,15,110,202
2649 .byte   102,73,15,110,211
2650
2651         leaq    0(%rsp),%rdi
2652         call    __ecp_nistz256_mul_by_2x
2653
2654         movq    64+0(%rsi),%rdx
2655         movq    64+8(%rsi),%r14
2656         movq    64+16(%rsi),%r15
2657         movq    64+24(%rsi),%r8
2658         leaq    64-128(%rsi),%rsi
2659         leaq    64(%rsp),%rdi
2660         call    __ecp_nistz256_sqr_montx
2661
2662         movq    0+0(%rsp),%rdx
2663         movq    8+0(%rsp),%r14
2664         leaq    -128+0(%rsp),%rsi
2665         movq    16+0(%rsp),%r15
2666         movq    24+0(%rsp),%r8
2667         leaq    0(%rsp),%rdi
2668         call    __ecp_nistz256_sqr_montx
2669
2670         movq    32(%rbx),%rdx
2671         movq    64+0(%rbx),%r9
2672         movq    64+8(%rbx),%r10
2673         movq    64+16(%rbx),%r11
2674         movq    64+24(%rbx),%r12
2675         leaq    64-128(%rbx),%rsi
2676         leaq    32(%rbx),%rbx
2677 .byte   102,72,15,126,215
2678         call    __ecp_nistz256_mul_montx
2679         call    __ecp_nistz256_mul_by_2x
2680
2681         movq    96+0(%rsp),%r12
2682         movq    96+8(%rsp),%r13
2683         leaq    64(%rsp),%rbx
2684         movq    96+16(%rsp),%r8
2685         movq    96+24(%rsp),%r9
2686         leaq    32(%rsp),%rdi
2687         call    __ecp_nistz256_add_tox
2688
2689         movq    96+0(%rsp),%r12
2690         movq    96+8(%rsp),%r13
2691         leaq    64(%rsp),%rbx
2692         movq    96+16(%rsp),%r8
2693         movq    96+24(%rsp),%r9
2694         leaq    64(%rsp),%rdi
2695         call    __ecp_nistz256_sub_fromx
2696
2697         movq    0+0(%rsp),%rdx
2698         movq    8+0(%rsp),%r14
2699         leaq    -128+0(%rsp),%rsi
2700         movq    16+0(%rsp),%r15
2701         movq    24+0(%rsp),%r8
2702 .byte   102,72,15,126,207
2703         call    __ecp_nistz256_sqr_montx
2704         xorq    %r9,%r9
2705         movq    %r12,%rax
2706         addq    $-1,%r12
2707         movq    %r13,%r10
2708         adcq    %rsi,%r13
2709         movq    %r14,%rcx
2710         adcq    $0,%r14
2711         movq    %r15,%r8
2712         adcq    %rbp,%r15
2713         adcq    $0,%r9
2714         xorq    %rsi,%rsi
2715         testq   $1,%rax
2716
2717         cmovzq  %rax,%r12
2718         cmovzq  %r10,%r13
2719         cmovzq  %rcx,%r14
2720         cmovzq  %r8,%r15
2721         cmovzq  %rsi,%r9
2722
2723         movq    %r13,%rax
2724         shrq    $1,%r12
2725         shlq    $63,%rax
2726         movq    %r14,%r10
2727         shrq    $1,%r13
2728         orq     %rax,%r12
2729         shlq    $63,%r10
2730         movq    %r15,%rcx
2731         shrq    $1,%r14
2732         orq     %r10,%r13
2733         shlq    $63,%rcx
2734         movq    %r12,0(%rdi)
2735         shrq    $1,%r15
2736         movq    %r13,8(%rdi)
2737         shlq    $63,%r9
2738         orq     %rcx,%r14
2739         orq     %r9,%r15
2740         movq    %r14,16(%rdi)
2741         movq    %r15,24(%rdi)
2742         movq    64(%rsp),%rdx
2743         leaq    64(%rsp),%rbx
2744         movq    0+32(%rsp),%r9
2745         movq    8+32(%rsp),%r10
2746         leaq    -128+32(%rsp),%rsi
2747         movq    16+32(%rsp),%r11
2748         movq    24+32(%rsp),%r12
2749         leaq    32(%rsp),%rdi
2750         call    __ecp_nistz256_mul_montx
2751
2752         leaq    128(%rsp),%rdi
2753         call    __ecp_nistz256_mul_by_2x
2754
2755         leaq    32(%rsp),%rbx
2756         leaq    32(%rsp),%rdi
2757         call    __ecp_nistz256_add_tox
2758
2759         movq    96(%rsp),%rdx
2760         leaq    96(%rsp),%rbx
2761         movq    0+0(%rsp),%r9
2762         movq    8+0(%rsp),%r10
2763         leaq    -128+0(%rsp),%rsi
2764         movq    16+0(%rsp),%r11
2765         movq    24+0(%rsp),%r12
2766         leaq    0(%rsp),%rdi
2767         call    __ecp_nistz256_mul_montx
2768
2769         leaq    128(%rsp),%rdi
2770         call    __ecp_nistz256_mul_by_2x
2771
2772         movq    0+32(%rsp),%rdx
2773         movq    8+32(%rsp),%r14
2774         leaq    -128+32(%rsp),%rsi
2775         movq    16+32(%rsp),%r15
2776         movq    24+32(%rsp),%r8
2777 .byte   102,72,15,126,199
2778         call    __ecp_nistz256_sqr_montx
2779
2780         leaq    128(%rsp),%rbx
2781         movq    %r14,%r8
2782         movq    %r15,%r9
2783         movq    %rsi,%r14
2784         movq    %rbp,%r15
2785         call    __ecp_nistz256_sub_fromx
2786
2787         movq    0+0(%rsp),%rax
2788         movq    0+8(%rsp),%rbp
2789         movq    0+16(%rsp),%rcx
2790         movq    0+24(%rsp),%r10
2791         leaq    0(%rsp),%rdi
2792         call    __ecp_nistz256_subx
2793
2794         movq    32(%rsp),%rdx
2795         leaq    32(%rsp),%rbx
2796         movq    %r12,%r14
2797         xorl    %ecx,%ecx
2798         movq    %r12,0+0(%rsp)
2799         movq    %r13,%r10
2800         movq    %r13,0+8(%rsp)
2801         cmovzq  %r8,%r11
2802         movq    %r8,0+16(%rsp)
2803         leaq    0-128(%rsp),%rsi
2804         cmovzq  %r9,%r12
2805         movq    %r9,0+24(%rsp)
2806         movq    %r14,%r9
2807         leaq    0(%rsp),%rdi
2808         call    __ecp_nistz256_mul_montx
2809
2810 .byte   102,72,15,126,203
2811 .byte   102,72,15,126,207
2812         call    __ecp_nistz256_sub_fromx
2813
2814         addq    $160+8,%rsp
2815         popq    %r15
2816         popq    %r14
2817         popq    %r13
2818         popq    %r12
2819         popq    %rbx
2820         popq    %rbp
2821         .byte   0xf3,0xc3
2822 .size   ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex
2823 .type   ecp_nistz256_point_addx,@function
2824 .align  32
2825 ecp_nistz256_point_addx:
2826 .Lpoint_addx:
2827         pushq   %rbp
2828         pushq   %rbx
2829         pushq   %r12
2830         pushq   %r13
2831         pushq   %r14
2832         pushq   %r15
2833         subq    $576+8,%rsp
2834
2835         movdqu  0(%rsi),%xmm0
2836         movdqu  16(%rsi),%xmm1
2837         movdqu  32(%rsi),%xmm2
2838         movdqu  48(%rsi),%xmm3
2839         movdqu  64(%rsi),%xmm4
2840         movdqu  80(%rsi),%xmm5
2841         movq    %rsi,%rbx
2842         movq    %rdx,%rsi
2843         movdqa  %xmm0,384(%rsp)
2844         movdqa  %xmm1,384+16(%rsp)
2845         por     %xmm0,%xmm1
2846         movdqa  %xmm2,416(%rsp)
2847         movdqa  %xmm3,416+16(%rsp)
2848         por     %xmm2,%xmm3
2849         movdqa  %xmm4,448(%rsp)
2850         movdqa  %xmm5,448+16(%rsp)
2851         por     %xmm1,%xmm3
2852
2853         movdqu  0(%rsi),%xmm0
2854         pshufd  $0xb1,%xmm3,%xmm5
2855         movdqu  16(%rsi),%xmm1
2856         movdqu  32(%rsi),%xmm2
2857         por     %xmm3,%xmm5
2858         movdqu  48(%rsi),%xmm3
2859         movq    64+0(%rsi),%rdx
2860         movq    64+8(%rsi),%r14
2861         movq    64+16(%rsi),%r15
2862         movq    64+24(%rsi),%r8
2863         movdqa  %xmm0,480(%rsp)
2864         pshufd  $0x1e,%xmm5,%xmm4
2865         movdqa  %xmm1,480+16(%rsp)
2866         por     %xmm0,%xmm1
2867 .byte   102,72,15,110,199
2868         movdqa  %xmm2,512(%rsp)
2869         movdqa  %xmm3,512+16(%rsp)
2870         por     %xmm2,%xmm3
2871         por     %xmm4,%xmm5
2872         pxor    %xmm4,%xmm4
2873         por     %xmm1,%xmm3
2874
2875         leaq    64-128(%rsi),%rsi
2876         movq    %rdx,544+0(%rsp)
2877         movq    %r14,544+8(%rsp)
2878         movq    %r15,544+16(%rsp)
2879         movq    %r8,544+24(%rsp)
2880         leaq    96(%rsp),%rdi
2881         call    __ecp_nistz256_sqr_montx
2882
2883         pcmpeqd %xmm4,%xmm5
2884         pshufd  $0xb1,%xmm3,%xmm4
2885         por     %xmm3,%xmm4
2886         pshufd  $0,%xmm5,%xmm5
2887         pshufd  $0x1e,%xmm4,%xmm3
2888         por     %xmm3,%xmm4
2889         pxor    %xmm3,%xmm3
2890         pcmpeqd %xmm3,%xmm4
2891         pshufd  $0,%xmm4,%xmm4
2892         movq    64+0(%rbx),%rdx
2893         movq    64+8(%rbx),%r14
2894         movq    64+16(%rbx),%r15
2895         movq    64+24(%rbx),%r8
2896 .byte   102,72,15,110,203
2897
2898         leaq    64-128(%rbx),%rsi
2899         leaq    32(%rsp),%rdi
2900         call    __ecp_nistz256_sqr_montx
2901
2902         movq    544(%rsp),%rdx
2903         leaq    544(%rsp),%rbx
2904         movq    0+96(%rsp),%r9
2905         movq    8+96(%rsp),%r10
2906         leaq    -128+96(%rsp),%rsi
2907         movq    16+96(%rsp),%r11
2908         movq    24+96(%rsp),%r12
2909         leaq    224(%rsp),%rdi
2910         call    __ecp_nistz256_mul_montx
2911
2912         movq    448(%rsp),%rdx
2913         leaq    448(%rsp),%rbx
2914         movq    0+32(%rsp),%r9
2915         movq    8+32(%rsp),%r10
2916         leaq    -128+32(%rsp),%rsi
2917         movq    16+32(%rsp),%r11
2918         movq    24+32(%rsp),%r12
2919         leaq    256(%rsp),%rdi
2920         call    __ecp_nistz256_mul_montx
2921
2922         movq    416(%rsp),%rdx
2923         leaq    416(%rsp),%rbx
2924         movq    0+224(%rsp),%r9
2925         movq    8+224(%rsp),%r10
2926         leaq    -128+224(%rsp),%rsi
2927         movq    16+224(%rsp),%r11
2928         movq    24+224(%rsp),%r12
2929         leaq    224(%rsp),%rdi
2930         call    __ecp_nistz256_mul_montx
2931
2932         movq    512(%rsp),%rdx
2933         leaq    512(%rsp),%rbx
2934         movq    0+256(%rsp),%r9
2935         movq    8+256(%rsp),%r10
2936         leaq    -128+256(%rsp),%rsi
2937         movq    16+256(%rsp),%r11
2938         movq    24+256(%rsp),%r12
2939         leaq    256(%rsp),%rdi
2940         call    __ecp_nistz256_mul_montx
2941
2942         leaq    224(%rsp),%rbx
2943         leaq    64(%rsp),%rdi
2944         call    __ecp_nistz256_sub_fromx
2945
2946         orq     %r13,%r12
2947         movdqa  %xmm4,%xmm2
2948         orq     %r8,%r12
2949         orq     %r9,%r12
2950         por     %xmm5,%xmm2
2951 .byte   102,73,15,110,220
2952
2953         movq    384(%rsp),%rdx
2954         leaq    384(%rsp),%rbx
2955         movq    0+96(%rsp),%r9
2956         movq    8+96(%rsp),%r10
2957         leaq    -128+96(%rsp),%rsi
2958         movq    16+96(%rsp),%r11
2959         movq    24+96(%rsp),%r12
2960         leaq    160(%rsp),%rdi
2961         call    __ecp_nistz256_mul_montx
2962
2963         movq    480(%rsp),%rdx
2964         leaq    480(%rsp),%rbx
2965         movq    0+32(%rsp),%r9
2966         movq    8+32(%rsp),%r10
2967         leaq    -128+32(%rsp),%rsi
2968         movq    16+32(%rsp),%r11
2969         movq    24+32(%rsp),%r12
2970         leaq    192(%rsp),%rdi
2971         call    __ecp_nistz256_mul_montx
2972
2973         leaq    160(%rsp),%rbx
2974         leaq    0(%rsp),%rdi
2975         call    __ecp_nistz256_sub_fromx
2976
2977         orq     %r13,%r12
2978         orq     %r8,%r12
2979         orq     %r9,%r12
2980
2981 .byte   0x3e
2982         jnz     .Ladd_proceedx
2983 .byte   102,73,15,126,208
2984 .byte   102,73,15,126,217
2985         testq   %r8,%r8
2986         jnz     .Ladd_proceedx
2987         testq   %r9,%r9
2988         jz      .Ladd_doublex
2989
2990 .byte   102,72,15,126,199
2991         pxor    %xmm0,%xmm0
2992         movdqu  %xmm0,0(%rdi)
2993         movdqu  %xmm0,16(%rdi)
2994         movdqu  %xmm0,32(%rdi)
2995         movdqu  %xmm0,48(%rdi)
2996         movdqu  %xmm0,64(%rdi)
2997         movdqu  %xmm0,80(%rdi)
2998         jmp     .Ladd_donex
2999
3000 .align  32
3001 .Ladd_doublex:
3002 .byte   102,72,15,126,206
3003 .byte   102,72,15,126,199
3004         addq    $416,%rsp
3005         jmp     .Lpoint_double_shortcutx
3006
3007 .align  32
3008 .Ladd_proceedx:
3009         movq    0+64(%rsp),%rdx
3010         movq    8+64(%rsp),%r14
3011         leaq    -128+64(%rsp),%rsi
3012         movq    16+64(%rsp),%r15
3013         movq    24+64(%rsp),%r8
3014         leaq    96(%rsp),%rdi
3015         call    __ecp_nistz256_sqr_montx
3016
3017         movq    448(%rsp),%rdx
3018         leaq    448(%rsp),%rbx
3019         movq    0+0(%rsp),%r9
3020         movq    8+0(%rsp),%r10
3021         leaq    -128+0(%rsp),%rsi
3022         movq    16+0(%rsp),%r11
3023         movq    24+0(%rsp),%r12
3024         leaq    352(%rsp),%rdi
3025         call    __ecp_nistz256_mul_montx
3026
3027         movq    0+0(%rsp),%rdx
3028         movq    8+0(%rsp),%r14
3029         leaq    -128+0(%rsp),%rsi
3030         movq    16+0(%rsp),%r15
3031         movq    24+0(%rsp),%r8
3032         leaq    32(%rsp),%rdi
3033         call    __ecp_nistz256_sqr_montx
3034
3035         movq    544(%rsp),%rdx
3036         leaq    544(%rsp),%rbx
3037         movq    0+352(%rsp),%r9
3038         movq    8+352(%rsp),%r10
3039         leaq    -128+352(%rsp),%rsi
3040         movq    16+352(%rsp),%r11
3041         movq    24+352(%rsp),%r12
3042         leaq    352(%rsp),%rdi
3043         call    __ecp_nistz256_mul_montx
3044
3045         movq    0(%rsp),%rdx
3046         leaq    0(%rsp),%rbx
3047         movq    0+32(%rsp),%r9
3048         movq    8+32(%rsp),%r10
3049         leaq    -128+32(%rsp),%rsi
3050         movq    16+32(%rsp),%r11
3051         movq    24+32(%rsp),%r12
3052         leaq    128(%rsp),%rdi
3053         call    __ecp_nistz256_mul_montx
3054
3055         movq    160(%rsp),%rdx
3056         leaq    160(%rsp),%rbx
3057         movq    0+32(%rsp),%r9
3058         movq    8+32(%rsp),%r10
3059         leaq    -128+32(%rsp),%rsi
3060         movq    16+32(%rsp),%r11
3061         movq    24+32(%rsp),%r12
3062         leaq    192(%rsp),%rdi
3063         call    __ecp_nistz256_mul_montx
3064
3065
3066
3067
3068         addq    %r12,%r12
3069         leaq    96(%rsp),%rsi
3070         adcq    %r13,%r13
3071         movq    %r12,%rax
3072         adcq    %r8,%r8
3073         adcq    %r9,%r9
3074         movq    %r13,%rbp
3075         sbbq    %r11,%r11
3076
3077         subq    $-1,%r12
3078         movq    %r8,%rcx
3079         sbbq    %r14,%r13
3080         sbbq    $0,%r8
3081         movq    %r9,%r10
3082         sbbq    %r15,%r9
3083         testq   %r11,%r11
3084
3085         cmovzq  %rax,%r12
3086         movq    0(%rsi),%rax
3087         cmovzq  %rbp,%r13
3088         movq    8(%rsi),%rbp
3089         cmovzq  %rcx,%r8
3090         movq    16(%rsi),%rcx
3091         cmovzq  %r10,%r9
3092         movq    24(%rsi),%r10
3093
3094         call    __ecp_nistz256_subx
3095
3096         leaq    128(%rsp),%rbx
3097         leaq    288(%rsp),%rdi
3098         call    __ecp_nistz256_sub_fromx
3099
3100         movq    192+0(%rsp),%rax
3101         movq    192+8(%rsp),%rbp
3102         movq    192+16(%rsp),%rcx
3103         movq    192+24(%rsp),%r10
3104         leaq    320(%rsp),%rdi
3105
3106         call    __ecp_nistz256_subx
3107
3108         movq    %r12,0(%rdi)
3109         movq    %r13,8(%rdi)
3110         movq    %r8,16(%rdi)
3111         movq    %r9,24(%rdi)
3112         movq    128(%rsp),%rdx
3113         leaq    128(%rsp),%rbx
3114         movq    0+224(%rsp),%r9
3115         movq    8+224(%rsp),%r10
3116         leaq    -128+224(%rsp),%rsi
3117         movq    16+224(%rsp),%r11
3118         movq    24+224(%rsp),%r12
3119         leaq    256(%rsp),%rdi
3120         call    __ecp_nistz256_mul_montx
3121
3122         movq    320(%rsp),%rdx
3123         leaq    320(%rsp),%rbx
3124         movq    0+64(%rsp),%r9
3125         movq    8+64(%rsp),%r10
3126         leaq    -128+64(%rsp),%rsi
3127         movq    16+64(%rsp),%r11
3128         movq    24+64(%rsp),%r12
3129         leaq    320(%rsp),%rdi
3130         call    __ecp_nistz256_mul_montx
3131
3132         leaq    256(%rsp),%rbx
3133         leaq    320(%rsp),%rdi
3134         call    __ecp_nistz256_sub_fromx
3135
3136 .byte   102,72,15,126,199
3137
3138         movdqa  %xmm5,%xmm0
3139         movdqa  %xmm5,%xmm1
3140         pandn   352(%rsp),%xmm0
3141         movdqa  %xmm5,%xmm2
3142         pandn   352+16(%rsp),%xmm1
3143         movdqa  %xmm5,%xmm3
3144         pand    544(%rsp),%xmm2
3145         pand    544+16(%rsp),%xmm3
3146         por     %xmm0,%xmm2
3147         por     %xmm1,%xmm3
3148
3149         movdqa  %xmm4,%xmm0
3150         movdqa  %xmm4,%xmm1
3151         pandn   %xmm2,%xmm0
3152         movdqa  %xmm4,%xmm2
3153         pandn   %xmm3,%xmm1
3154         movdqa  %xmm4,%xmm3
3155         pand    448(%rsp),%xmm2
3156         pand    448+16(%rsp),%xmm3
3157         por     %xmm0,%xmm2
3158         por     %xmm1,%xmm3
3159         movdqu  %xmm2,64(%rdi)
3160         movdqu  %xmm3,80(%rdi)
3161
3162         movdqa  %xmm5,%xmm0
3163         movdqa  %xmm5,%xmm1
3164         pandn   288(%rsp),%xmm0
3165         movdqa  %xmm5,%xmm2
3166         pandn   288+16(%rsp),%xmm1
3167         movdqa  %xmm5,%xmm3
3168         pand    480(%rsp),%xmm2
3169         pand    480+16(%rsp),%xmm3
3170         por     %xmm0,%xmm2
3171         por     %xmm1,%xmm3
3172
3173         movdqa  %xmm4,%xmm0
3174         movdqa  %xmm4,%xmm1
3175         pandn   %xmm2,%xmm0
3176         movdqa  %xmm4,%xmm2
3177         pandn   %xmm3,%xmm1
3178         movdqa  %xmm4,%xmm3
3179         pand    384(%rsp),%xmm2
3180         pand    384+16(%rsp),%xmm3
3181         por     %xmm0,%xmm2
3182         por     %xmm1,%xmm3
3183         movdqu  %xmm2,0(%rdi)
3184         movdqu  %xmm3,16(%rdi)
3185
3186         movdqa  %xmm5,%xmm0
3187         movdqa  %xmm5,%xmm1
3188         pandn   320(%rsp),%xmm0
3189         movdqa  %xmm5,%xmm2
3190         pandn   320+16(%rsp),%xmm1
3191         movdqa  %xmm5,%xmm3
3192         pand    512(%rsp),%xmm2
3193         pand    512+16(%rsp),%xmm3
3194         por     %xmm0,%xmm2
3195         por     %xmm1,%xmm3
3196
3197         movdqa  %xmm4,%xmm0
3198         movdqa  %xmm4,%xmm1
3199         pandn   %xmm2,%xmm0
3200         movdqa  %xmm4,%xmm2
3201         pandn   %xmm3,%xmm1
3202         movdqa  %xmm4,%xmm3
3203         pand    416(%rsp),%xmm2
3204         pand    416+16(%rsp),%xmm3
3205         por     %xmm0,%xmm2
3206         por     %xmm1,%xmm3
3207         movdqu  %xmm2,32(%rdi)
3208         movdqu  %xmm3,48(%rdi)
3209
3210 .Ladd_donex:
3211         addq    $576+8,%rsp
3212         popq    %r15
3213         popq    %r14
3214         popq    %r13
3215         popq    %r12
3216         popq    %rbx
3217         popq    %rbp
3218         .byte   0xf3,0xc3
3219 .size   ecp_nistz256_point_addx,.-ecp_nistz256_point_addx
3220 .type   ecp_nistz256_point_add_affinex,@function
3221 .align  32
3222 ecp_nistz256_point_add_affinex:
3223 .Lpoint_add_affinex:
3224         pushq   %rbp
3225         pushq   %rbx
3226         pushq   %r12
3227         pushq   %r13
3228         pushq   %r14
3229         pushq   %r15
3230         subq    $480+8,%rsp
3231
3232         movdqu  0(%rsi),%xmm0
3233         movq    %rdx,%rbx
3234         movdqu  16(%rsi),%xmm1
3235         movdqu  32(%rsi),%xmm2
3236         movdqu  48(%rsi),%xmm3
3237         movdqu  64(%rsi),%xmm4
3238         movdqu  80(%rsi),%xmm5
3239         movq    64+0(%rsi),%rdx
3240         movq    64+8(%rsi),%r14
3241         movq    64+16(%rsi),%r15
3242         movq    64+24(%rsi),%r8
3243         movdqa  %xmm0,320(%rsp)
3244         movdqa  %xmm1,320+16(%rsp)
3245         por     %xmm0,%xmm1
3246         movdqa  %xmm2,352(%rsp)
3247         movdqa  %xmm3,352+16(%rsp)
3248         por     %xmm2,%xmm3
3249         movdqa  %xmm4,384(%rsp)
3250         movdqa  %xmm5,384+16(%rsp)
3251         por     %xmm1,%xmm3
3252
3253         movdqu  0(%rbx),%xmm0
3254         pshufd  $0xb1,%xmm3,%xmm5
3255         movdqu  16(%rbx),%xmm1
3256         movdqu  32(%rbx),%xmm2
3257         por     %xmm3,%xmm5
3258         movdqu  48(%rbx),%xmm3
3259         movdqa  %xmm0,416(%rsp)
3260         pshufd  $0x1e,%xmm5,%xmm4
3261         movdqa  %xmm1,416+16(%rsp)
3262         por     %xmm0,%xmm1
3263 .byte   102,72,15,110,199
3264         movdqa  %xmm2,448(%rsp)
3265         movdqa  %xmm3,448+16(%rsp)
3266         por     %xmm2,%xmm3
3267         por     %xmm4,%xmm5
3268         pxor    %xmm4,%xmm4
3269         por     %xmm1,%xmm3
3270
3271         leaq    64-128(%rsi),%rsi
3272         leaq    32(%rsp),%rdi
3273         call    __ecp_nistz256_sqr_montx
3274
3275         pcmpeqd %xmm4,%xmm5
3276         pshufd  $0xb1,%xmm3,%xmm4
3277         movq    0(%rbx),%rdx
3278
3279         movq    %r12,%r9
3280         por     %xmm3,%xmm4
3281         pshufd  $0,%xmm5,%xmm5
3282         pshufd  $0x1e,%xmm4,%xmm3
3283         movq    %r13,%r10
3284         por     %xmm3,%xmm4
3285         pxor    %xmm3,%xmm3
3286         movq    %r14,%r11
3287         pcmpeqd %xmm3,%xmm4
3288         pshufd  $0,%xmm4,%xmm4
3289
3290         leaq    32-128(%rsp),%rsi
3291         movq    %r15,%r12
3292         leaq    0(%rsp),%rdi
3293         call    __ecp_nistz256_mul_montx
3294
3295         leaq    320(%rsp),%rbx
3296         leaq    64(%rsp),%rdi
3297         call    __ecp_nistz256_sub_fromx
3298
3299         movq    384(%rsp),%rdx
3300         leaq    384(%rsp),%rbx
3301         movq    0+32(%rsp),%r9
3302         movq    8+32(%rsp),%r10
3303         leaq    -128+32(%rsp),%rsi
3304         movq    16+32(%rsp),%r11
3305         movq    24+32(%rsp),%r12
3306         leaq    32(%rsp),%rdi
3307         call    __ecp_nistz256_mul_montx
3308
3309         movq    384(%rsp),%rdx
3310         leaq    384(%rsp),%rbx
3311         movq    0+64(%rsp),%r9
3312         movq    8+64(%rsp),%r10
3313         leaq    -128+64(%rsp),%rsi
3314         movq    16+64(%rsp),%r11
3315         movq    24+64(%rsp),%r12
3316         leaq    288(%rsp),%rdi
3317         call    __ecp_nistz256_mul_montx
3318
3319         movq    448(%rsp),%rdx
3320         leaq    448(%rsp),%rbx
3321         movq    0+32(%rsp),%r9
3322         movq    8+32(%rsp),%r10
3323         leaq    -128+32(%rsp),%rsi
3324         movq    16+32(%rsp),%r11
3325         movq    24+32(%rsp),%r12
3326         leaq    32(%rsp),%rdi
3327         call    __ecp_nistz256_mul_montx
3328
3329         leaq    352(%rsp),%rbx
3330         leaq    96(%rsp),%rdi
3331         call    __ecp_nistz256_sub_fromx
3332
3333         movq    0+64(%rsp),%rdx
3334         movq    8+64(%rsp),%r14
3335         leaq    -128+64(%rsp),%rsi
3336         movq    16+64(%rsp),%r15
3337         movq    24+64(%rsp),%r8
3338         leaq    128(%rsp),%rdi
3339         call    __ecp_nistz256_sqr_montx
3340
3341         movq    0+96(%rsp),%rdx
3342         movq    8+96(%rsp),%r14
3343         leaq    -128+96(%rsp),%rsi
3344         movq    16+96(%rsp),%r15
3345         movq    24+96(%rsp),%r8
3346         leaq    192(%rsp),%rdi
3347         call    __ecp_nistz256_sqr_montx
3348
3349         movq    128(%rsp),%rdx
3350         leaq    128(%rsp),%rbx
3351         movq    0+64(%rsp),%r9
3352         movq    8+64(%rsp),%r10
3353         leaq    -128+64(%rsp),%rsi
3354         movq    16+64(%rsp),%r11
3355         movq    24+64(%rsp),%r12
3356         leaq    160(%rsp),%rdi
3357         call    __ecp_nistz256_mul_montx
3358
3359         movq    320(%rsp),%rdx
3360         leaq    320(%rsp),%rbx
3361         movq    0+128(%rsp),%r9
3362         movq    8+128(%rsp),%r10
3363         leaq    -128+128(%rsp),%rsi
3364         movq    16+128(%rsp),%r11
3365         movq    24+128(%rsp),%r12
3366         leaq    0(%rsp),%rdi
3367         call    __ecp_nistz256_mul_montx
3368
3369
3370
3371
3372         addq    %r12,%r12
3373         leaq    192(%rsp),%rsi
3374         adcq    %r13,%r13
3375         movq    %r12,%rax
3376         adcq    %r8,%r8
3377         adcq    %r9,%r9
3378         movq    %r13,%rbp
3379         sbbq    %r11,%r11
3380
3381         subq    $-1,%r12
3382         movq    %r8,%rcx
3383         sbbq    %r14,%r13
3384         sbbq    $0,%r8
3385         movq    %r9,%r10
3386         sbbq    %r15,%r9
3387         testq   %r11,%r11
3388
3389         cmovzq  %rax,%r12
3390         movq    0(%rsi),%rax
3391         cmovzq  %rbp,%r13
3392         movq    8(%rsi),%rbp
3393         cmovzq  %rcx,%r8
3394         movq    16(%rsi),%rcx
3395         cmovzq  %r10,%r9
3396         movq    24(%rsi),%r10
3397
3398         call    __ecp_nistz256_subx
3399
3400         leaq    160(%rsp),%rbx
3401         leaq    224(%rsp),%rdi
3402         call    __ecp_nistz256_sub_fromx
3403
3404         movq    0+0(%rsp),%rax
3405         movq    0+8(%rsp),%rbp
3406         movq    0+16(%rsp),%rcx
3407         movq    0+24(%rsp),%r10
3408         leaq    64(%rsp),%rdi
3409
3410         call    __ecp_nistz256_subx
3411
3412         movq    %r12,0(%rdi)
3413         movq    %r13,8(%rdi)
3414         movq    %r8,16(%rdi)
3415         movq    %r9,24(%rdi)
3416         movq    352(%rsp),%rdx
3417         leaq    352(%rsp),%rbx
3418         movq    0+160(%rsp),%r9
3419         movq    8+160(%rsp),%r10
3420         leaq    -128+160(%rsp),%rsi
3421         movq    16+160(%rsp),%r11
3422         movq    24+160(%rsp),%r12
3423         leaq    32(%rsp),%rdi
3424         call    __ecp_nistz256_mul_montx
3425
3426         movq    96(%rsp),%rdx
3427         leaq    96(%rsp),%rbx
3428         movq    0+64(%rsp),%r9
3429         movq    8+64(%rsp),%r10
3430         leaq    -128+64(%rsp),%rsi
3431         movq    16+64(%rsp),%r11
3432         movq    24+64(%rsp),%r12
3433         leaq    64(%rsp),%rdi
3434         call    __ecp_nistz256_mul_montx
3435
3436         leaq    32(%rsp),%rbx
3437         leaq    256(%rsp),%rdi
3438         call    __ecp_nistz256_sub_fromx
3439
3440 .byte   102,72,15,126,199
3441
3442         movdqa  %xmm5,%xmm0
3443         movdqa  %xmm5,%xmm1
3444         pandn   288(%rsp),%xmm0
3445         movdqa  %xmm5,%xmm2
3446         pandn   288+16(%rsp),%xmm1
3447         movdqa  %xmm5,%xmm3
3448         pand    .LONE_mont(%rip),%xmm2
3449         pand    .LONE_mont+16(%rip),%xmm3
3450         por     %xmm0,%xmm2
3451         por     %xmm1,%xmm3
3452
3453         movdqa  %xmm4,%xmm0
3454         movdqa  %xmm4,%xmm1
3455         pandn   %xmm2,%xmm0
3456         movdqa  %xmm4,%xmm2
3457         pandn   %xmm3,%xmm1
3458         movdqa  %xmm4,%xmm3
3459         pand    384(%rsp),%xmm2
3460         pand    384+16(%rsp),%xmm3
3461         por     %xmm0,%xmm2
3462         por     %xmm1,%xmm3
3463         movdqu  %xmm2,64(%rdi)
3464         movdqu  %xmm3,80(%rdi)
3465
3466         movdqa  %xmm5,%xmm0
3467         movdqa  %xmm5,%xmm1
3468         pandn   224(%rsp),%xmm0
3469         movdqa  %xmm5,%xmm2
3470         pandn   224+16(%rsp),%xmm1
3471         movdqa  %xmm5,%xmm3
3472         pand    416(%rsp),%xmm2
3473         pand    416+16(%rsp),%xmm3
3474         por     %xmm0,%xmm2
3475         por     %xmm1,%xmm3
3476
3477         movdqa  %xmm4,%xmm0
3478         movdqa  %xmm4,%xmm1
3479         pandn   %xmm2,%xmm0
3480         movdqa  %xmm4,%xmm2
3481         pandn   %xmm3,%xmm1
3482         movdqa  %xmm4,%xmm3
3483         pand    320(%rsp),%xmm2
3484         pand    320+16(%rsp),%xmm3
3485         por     %xmm0,%xmm2
3486         por     %xmm1,%xmm3
3487         movdqu  %xmm2,0(%rdi)
3488         movdqu  %xmm3,16(%rdi)
3489
3490         movdqa  %xmm5,%xmm0
3491         movdqa  %xmm5,%xmm1
3492         pandn   256(%rsp),%xmm0
3493         movdqa  %xmm5,%xmm2
3494         pandn   256+16(%rsp),%xmm1
3495         movdqa  %xmm5,%xmm3
3496         pand    448(%rsp),%xmm2
3497         pand    448+16(%rsp),%xmm3
3498         por     %xmm0,%xmm2
3499         por     %xmm1,%xmm3
3500
3501         movdqa  %xmm4,%xmm0
3502         movdqa  %xmm4,%xmm1
3503         pandn   %xmm2,%xmm0
3504         movdqa  %xmm4,%xmm2
3505         pandn   %xmm3,%xmm1
3506         movdqa  %xmm4,%xmm3
3507         pand    352(%rsp),%xmm2
3508         pand    352+16(%rsp),%xmm3
3509         por     %xmm0,%xmm2
3510         por     %xmm1,%xmm3
3511         movdqu  %xmm2,32(%rdi)
3512         movdqu  %xmm3,48(%rdi)
3513
3514         addq    $480+8,%rsp
3515         popq    %r15
3516         popq    %r14
3517         popq    %r13
3518         popq    %r12
3519         popq    %rbx
3520         popq    %rbp
3521         .byte   0xf3,0xc3
3522 .size   ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex
3523 .section .note.GNU-stack,"",%progbits