5 .globl bn_mul_mont_gather5
6 .type bn_mul_mont_gather5,@function
28 leaq -264(%rsp,%r11,8),%rsp
31 movq %rax,8(%rsp,%r9,8)
42 movq (%rsp,%rax,1),%r11
50 leaq 24-112(%rsp,%r9,8),%r10
62 movdqa %xmm0,112(%r10)
67 movdqa %xmm1,128(%r10)
72 movdqa %xmm2,144(%r10)
77 movdqa %xmm3,160(%r10)
81 movdqa %xmm0,176(%r10)
86 movdqa %xmm1,192(%r10)
91 movdqa %xmm2,208(%r10)
96 movdqa %xmm3,224(%r10)
100 movdqa %xmm0,240(%r10)
105 movdqa %xmm1,256(%r10)
110 movdqa %xmm2,272(%r10)
115 movdqa %xmm3,288(%r10)
119 movdqa %xmm0,304(%r10)
124 movdqa %xmm1,320(%r10)
127 movdqa %xmm2,336(%r10)
132 movdqa %xmm3,352(%r10)
136 movdqa -128(%r12),%xmm4
137 movdqa -112(%r12),%xmm5
138 movdqa -96(%r12),%xmm2
140 movdqa -80(%r12),%xmm3
148 movdqa -64(%r12),%xmm4
149 movdqa -48(%r12),%xmm5
150 movdqa -32(%r12),%xmm2
152 movdqa -16(%r12),%xmm3
161 movdqa 16(%r12),%xmm5
162 movdqa 32(%r12),%xmm2
164 movdqa 48(%r12),%xmm3
173 pshufd $0x4e,%xmm0,%xmm1
176 .byte 102,72,15,126,195
204 movq (%rsi,%r15,8),%rax
209 movq %r13,-16(%rsp,%r15,8)
215 movq (%rcx,%r15,8),%rax
229 movq %r13,-16(%rsp,%r9,8)
236 movq %r13,-8(%rsp,%r9,8)
237 movq %rdx,(%rsp,%r9,8)
243 leaq 24+128(%rsp,%r9,8),%rdx
247 movdqa -128(%r12),%xmm0
248 movdqa -112(%r12),%xmm1
249 movdqa -96(%r12),%xmm2
250 movdqa -80(%r12),%xmm3
251 pand -128(%rdx),%xmm0
252 pand -112(%rdx),%xmm1
259 movdqa -64(%r12),%xmm0
260 movdqa -48(%r12),%xmm1
261 movdqa -32(%r12),%xmm2
262 movdqa -16(%r12),%xmm3
272 movdqa 16(%r12),%xmm1
273 movdqa 32(%r12),%xmm2
274 movdqa 48(%r12),%xmm3
283 movdqa 64(%r12),%xmm0
284 movdqa 80(%r12),%xmm1
285 movdqa 96(%r12),%xmm2
286 movdqa 112(%r12),%xmm3
296 pshufd $0x4e,%xmm4,%xmm0
301 .byte 102,72,15,126,195
328 movq (%rsi,%r15,8),%rax
331 movq (%rsp,%r15,8),%r10
333 movq %r13,-16(%rsp,%r15,8)
339 movq (%rcx,%r15,8),%rax
353 movq (%rsp,%r9,8),%r10
355 movq %r13,-16(%rsp,%r9,8)
363 movq %r13,-8(%rsp,%r9,8)
364 movq %rdx,(%rsp,%r9,8)
376 .Lsub: sbbq (%rcx,%r14,8),%rax
377 movq %rax,(%rdi,%r14,8)
378 movq 8(%rsi,%r14,8),%rax
393 movq (%rsi,%r14,8),%rax
394 movq %r14,(%rsp,%r14,8)
395 movq %rax,(%rdi,%r14,8)
400 movq 8(%rsp,%r9,8),%rsi
412 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5
413 .type bn_mul4x_mont_gather5,@function
415 bn_mul4x_mont_gather5:
428 leaq (%r9,%r9,2),%r10
440 leaq -320(%rsp,%r9,2),%r11
446 leaq -320(%rsp,%r9,2),%rsp
451 leaq 4096-320(,%r9,2),%r10
452 leaq -320(%rsp,%r9,2),%rsp
463 movq (%rsp,%r11,1),%r10
466 jnc .Lmul4x_page_walk
487 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
489 .type mul4x_internal,@function
494 leaq .Linc(%rip),%rax
495 leaq 128(%rdx,%r9,1),%r13
498 movdqa 16(%rax),%xmm1
499 leaq 88-112(%rsp,%r9,1),%r10
502 pshufd $0,%xmm5,%xmm5
512 movdqa %xmm0,112(%r10)
517 movdqa %xmm1,128(%r10)
522 movdqa %xmm2,144(%r10)
527 movdqa %xmm3,160(%r10)
531 movdqa %xmm0,176(%r10)
536 movdqa %xmm1,192(%r10)
541 movdqa %xmm2,208(%r10)
546 movdqa %xmm3,224(%r10)
550 movdqa %xmm0,240(%r10)
555 movdqa %xmm1,256(%r10)
560 movdqa %xmm2,272(%r10)
565 movdqa %xmm3,288(%r10)
569 movdqa %xmm0,304(%r10)
574 movdqa %xmm1,320(%r10)
577 movdqa %xmm2,336(%r10)
582 movdqa %xmm3,352(%r10)
586 movdqa -128(%r12),%xmm4
587 movdqa -112(%r12),%xmm5
588 movdqa -96(%r12),%xmm2
590 movdqa -80(%r12),%xmm3
598 movdqa -64(%r12),%xmm4
599 movdqa -48(%r12),%xmm5
600 movdqa -32(%r12),%xmm2
602 movdqa -16(%r12),%xmm3
611 movdqa 16(%r12),%xmm5
612 movdqa 32(%r12),%xmm2
614 movdqa 48(%r12),%xmm3
623 pshufd $0x4e,%xmm0,%xmm1
626 .byte 102,72,15,126,195
633 leaq (%rsi,%r9,1),%rsi
647 movq 8(%rsi,%r9,1),%rax
659 movq 16(%rsi,%r9,1),%rax
680 movq -8(%rsi,%r15,1),%rax
695 movq (%rsi,%r15,1),%rax
710 movq 8(%rsi,%r15,1),%rax
725 movq 16(%rsi,%r15,1),%rax
760 movq (%rsi,%r9,1),%rax
767 leaq (%rcx,%r9,1),%rcx
778 leaq 16+128(%r14),%rdx
781 movdqa -128(%r12),%xmm0
782 movdqa -112(%r12),%xmm1
783 movdqa -96(%r12),%xmm2
784 movdqa -80(%r12),%xmm3
785 pand -128(%rdx),%xmm0
786 pand -112(%rdx),%xmm1
793 movdqa -64(%r12),%xmm0
794 movdqa -48(%r12),%xmm1
795 movdqa -32(%r12),%xmm2
796 movdqa -16(%r12),%xmm3
806 movdqa 16(%r12),%xmm1
807 movdqa 32(%r12),%xmm2
808 movdqa 48(%r12),%xmm3
817 movdqa 64(%r12),%xmm0
818 movdqa 80(%r12),%xmm1
819 movdqa 96(%r12),%xmm2
820 movdqa 112(%r12),%xmm3
830 pshufd $0x4e,%xmm4,%xmm0
833 .byte 102,72,15,126,195
835 movq (%r14,%r9,1),%r10
846 leaq (%r14,%r9,1),%r14
850 movq 8(%rsi,%r9,1),%rax
864 movq 16(%rsi,%r9,1),%rax
886 movq -8(%rsi,%r15,1),%rax
903 movq (%rsi,%r15,1),%rax
920 movq 8(%rsi,%r15,1),%rax
937 movq 16(%rsi,%r15,1),%rax
977 movq (%rsi,%r9,1),%rax
985 leaq (%rcx,%r9,1),%rcx
1001 leaq (%r14,%r9,1),%rbx
1006 movq 56+8(%rsp),%rdi
1012 jmp .Lsqr4x_sub_entry
1013 .size mul4x_internal,.-mul4x_internal
1015 .type bn_power5,@function
1027 leal (%r9,%r9,2),%r10d
1038 leaq -320(%rsp,%r9,2),%r11
1044 leaq -320(%rsp,%r9,2),%rsp
1049 leaq 4096-320(,%r9,2),%r10
1050 leaq -320(%rsp,%r9,2),%rsp
1061 movq (%rsp,%r11,1),%r10
1081 .byte 102,72,15,110,207
1082 .byte 102,72,15,110,209
1083 .byte 102,73,15,110,218
1084 .byte 102,72,15,110,226
1086 call __bn_sqr8x_internal
1087 call __bn_post4x_internal
1088 call __bn_sqr8x_internal
1089 call __bn_post4x_internal
1090 call __bn_sqr8x_internal
1091 call __bn_post4x_internal
1092 call __bn_sqr8x_internal
1093 call __bn_post4x_internal
1094 call __bn_sqr8x_internal
1095 call __bn_post4x_internal
1097 .byte 102,72,15,126,209
1098 .byte 102,72,15,126,226
1116 .size bn_power5,.-bn_power5
1118 .globl bn_sqr8x_internal
1119 .hidden bn_sqr8x_internal
1120 .type bn_sqr8x_internal,@function
1123 __bn_sqr8x_internal:
1198 leaq (%rsi,%r9,1),%rsi
1203 movq -32(%rsi,%rbp,1),%r14
1204 leaq 48+8(%rsp,%r9,2),%rdi
1205 movq -24(%rsi,%rbp,1),%rax
1206 leaq -32(%rdi,%rbp,1),%rdi
1207 movq -16(%rsi,%rbp,1),%rbx
1214 movq %r10,-24(%rdi,%rbp,1)
1220 movq %r11,-16(%rdi,%rbp,1)
1224 movq -8(%rsi,%rbp,1),%rbx
1238 movq %r10,-8(%rdi,%rcx,1)
1243 movq (%rsi,%rcx,1),%rbx
1253 movq 8(%rsi,%rcx,1),%rbx
1263 movq %r11,(%rdi,%rcx,1)
1270 movq 16(%rsi,%rcx,1),%rbx
1279 movq %r10,8(%rdi,%rcx,1)
1286 movq 24(%rsi,%rcx,1),%rbx
1296 movq %r11,16(%rdi,%rcx,1)
1308 movq %r10,-8(%rdi,%rcx,1)
1327 movq -32(%rsi,%rbp,1),%r14
1328 leaq 48+8(%rsp,%r9,2),%rdi
1329 movq -24(%rsi,%rbp,1),%rax
1330 leaq -32(%rdi,%rbp,1),%rdi
1331 movq -16(%rsi,%rbp,1),%rbx
1335 movq -24(%rdi,%rbp,1),%r10
1339 movq %r10,-24(%rdi,%rbp,1)
1346 addq -16(%rdi,%rbp,1),%r11
1349 movq %r11,-16(%rdi,%rbp,1)
1353 movq -8(%rsi,%rbp,1),%rbx
1358 addq -8(%rdi,%rbp,1),%r12
1369 movq %r10,-8(%rdi,%rbp,1)
1376 movq (%rsi,%rcx,1),%rbx
1382 addq (%rdi,%rcx,1),%r13
1389 movq 8(%rsi,%rcx,1),%rbx
1397 movq %r11,(%rdi,%rcx,1)
1401 addq 8(%rdi,%rcx,1),%r12
1412 movq %r10,-8(%rdi,%rcx,1)
1433 leaq 48+8(%rsp,%r9,2),%rdi
1435 leaq -32(%rdi,%rbp,1),%rdi
1494 movq -16(%rsi,%rbp,1),%rax
1495 leaq 48+8(%rsp),%rdi
1499 leaq (%r14,%r10,2),%r12
1501 leaq (%rcx,%r11,2),%r13
1510 movq -8(%rsi,%rbp,1),%rax
1514 leaq (%r14,%r10,2),%rbx
1518 leaq (%rcx,%r11,2),%r8
1527 movq 0(%rsi,%rbp,1),%rax
1534 jmp .Lsqr4x_shift_n_add
1537 .Lsqr4x_shift_n_add:
1538 leaq (%r14,%r10,2),%r12
1540 leaq (%rcx,%r11,2),%r13
1549 movq -8(%rsi,%rbp,1),%rax
1553 leaq (%r14,%r10,2),%rbx
1557 leaq (%rcx,%r11,2),%r8
1566 movq 0(%rsi,%rbp,1),%rax
1570 leaq (%r14,%r10,2),%r12
1574 leaq (%rcx,%r11,2),%r13
1583 movq 8(%rsi,%rbp,1),%rax
1587 leaq (%r14,%r10,2),%rbx
1591 leaq (%rcx,%r11,2),%r8
1600 movq 16(%rsi,%rbp,1),%rax
1607 jnz .Lsqr4x_shift_n_add
1609 leaq (%r14,%r10,2),%r12
1612 leaq (%rcx,%r11,2),%r13
1625 leaq (%r14,%r10,2),%rbx
1629 leaq (%rcx,%r11,2),%r8
1638 .byte 102,72,15,126,213
1639 __bn_sqr8x_reduction:
1641 leaq (%r9,%rbp,1),%rcx
1642 leaq 48+8(%rsp,%r9,2),%rdx
1644 leaq 48+8(%rsp,%r9,1),%rdi
1647 jmp .L8x_reduction_loop
1650 .L8x_reduction_loop:
1651 leaq (%rdi,%r9,1),%rdi
1666 imulq 32+8(%rsp),%rbx
1684 movq %rbx,48-8+8(%rsp,%rcx,8)
1693 movq 32+8(%rsp),%rsi
1759 movq 48+56+8(%rsp),%rbx
1823 movq 48-16+8(%rsp,%rcx,8),%rbx
1839 movq 48+56+8(%rsp),%rbx
1883 .byte 102,72,15,126,213
1887 .byte 102,73,15,126,217
1897 jb .L8x_reduction_loop
1899 .size bn_sqr8x_internal,.-bn_sqr8x_internal
1900 .type __bn_post4x_internal,@function
1902 __bn_post4x_internal:
1904 leaq (%rdi,%r9,1),%rbx
1906 .byte 102,72,15,126,207
1908 .byte 102,72,15,126,206
1915 jmp .Lsqr4x_sub_entry
1953 .size __bn_post4x_internal,.-__bn_post4x_internal
1954 .globl bn_from_montgomery
1955 .type bn_from_montgomery,@function
1962 .size bn_from_montgomery,.-bn_from_montgomery
1964 .type bn_from_mont8x,@function
1977 leaq (%r9,%r9,2),%r10
1988 leaq -320(%rsp,%r9,2),%r11
1994 leaq -320(%rsp,%r9,2),%rsp
1999 leaq 4096-320(,%r9,2),%r10
2000 leaq -320(%rsp,%r9,2),%rsp
2011 movq (%rsp,%r11,1),%r10
2014 jnc .Lfrom_page_walk
2039 movdqu 16(%rsi),%xmm2
2040 movdqu 32(%rsi),%xmm3
2041 movdqa %xmm0,(%rax,%r9,1)
2042 movdqu 48(%rsi),%xmm4
2043 movdqa %xmm0,16(%rax,%r9,1)
2044 .byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
2046 movdqa %xmm0,32(%rax,%r9,1)
2047 movdqa %xmm2,16(%rax)
2048 movdqa %xmm0,48(%rax,%r9,1)
2049 movdqa %xmm3,32(%rax)
2050 movdqa %xmm4,48(%rax)
2055 .byte 102,72,15,110,207
2056 .byte 102,72,15,110,209
2059 .byte 102,73,15,110,218
2060 call __bn_sqr8x_reduction
2061 call __bn_post4x_internal
2066 jmp .Lfrom_mont_zero
2070 movdqa %xmm0,0(%rax)
2071 movdqa %xmm0,16(%rax)
2072 movdqa %xmm0,32(%rax)
2073 movdqa %xmm0,48(%rax)
2076 jnz .Lfrom_mont_zero
2088 .size bn_from_mont8x,.-bn_from_mont8x
2090 .type bn_get_bits5,@function
2102 movzwl (%r10,%rsi,2),%eax
2106 .size bn_get_bits5,.-bn_get_bits5
2109 .type bn_scatter5,@function
2113 jz .Lscatter_epilogue
2114 leaq (%rdx,%rcx,8),%rdx
2124 .size bn_scatter5,.-bn_scatter5
2127 .type bn_gather5,@function
2130 .LSEH_begin_bn_gather5:
2132 .byte 0x4c,0x8d,0x14,0x24
2133 .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00
2134 leaq .Linc(%rip),%rax
2138 movdqa 0(%rax),%xmm0
2139 movdqa 16(%rax),%xmm1
2143 pshufd $0,%xmm5,%xmm5
2152 movdqa %xmm0,-128(%rax)
2157 movdqa %xmm1,-112(%rax)
2162 movdqa %xmm2,-96(%rax)
2166 movdqa %xmm3,-80(%rax)
2171 movdqa %xmm0,-64(%rax)
2176 movdqa %xmm1,-48(%rax)
2181 movdqa %xmm2,-32(%rax)
2185 movdqa %xmm3,-16(%rax)
2190 movdqa %xmm0,0(%rax)
2195 movdqa %xmm1,16(%rax)
2200 movdqa %xmm2,32(%rax)
2204 movdqa %xmm3,48(%rax)
2209 movdqa %xmm0,64(%rax)
2214 movdqa %xmm1,80(%rax)
2219 movdqa %xmm2,96(%rax)
2221 movdqa %xmm3,112(%rax)
2228 movdqa -128(%r11),%xmm0
2229 movdqa -112(%r11),%xmm1
2230 movdqa -96(%r11),%xmm2
2231 pand -128(%rax),%xmm0
2232 movdqa -80(%r11),%xmm3
2233 pand -112(%rax),%xmm1
2235 pand -96(%rax),%xmm2
2237 pand -80(%rax),%xmm3
2240 movdqa -64(%r11),%xmm0
2241 movdqa -48(%r11),%xmm1
2242 movdqa -32(%r11),%xmm2
2243 pand -64(%rax),%xmm0
2244 movdqa -16(%r11),%xmm3
2245 pand -48(%rax),%xmm1
2247 pand -32(%rax),%xmm2
2249 pand -16(%rax),%xmm3
2252 movdqa 0(%r11),%xmm0
2253 movdqa 16(%r11),%xmm1
2254 movdqa 32(%r11),%xmm2
2256 movdqa 48(%r11),%xmm3
2264 movdqa 64(%r11),%xmm0
2265 movdqa 80(%r11),%xmm1
2266 movdqa 96(%r11),%xmm2
2268 movdqa 112(%r11),%xmm3
2273 pand 112(%rax),%xmm3
2278 pshufd $0x4e,%xmm4,%xmm0
2287 .LSEH_end_bn_gather5:
2288 .size bn_gather5,.-bn_gather5
2293 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2294 .section .note.GNU-stack,"",%progbits