Merge branch 'vendor/OPENSSL'
[dragonfly.git] / secure / lib / libcrypto / asm / x86_64-mont.s
1 .text   
2
3 .globl  bn_mul_mont
4 .type   bn_mul_mont,@function
5 .align  16
6 bn_mul_mont:
7         pushq   %rbx
8         pushq   %rbp
9         pushq   %r12
10         pushq   %r13
11         pushq   %r14
12         pushq   %r15
13
14         movl    %r9d,%r9d
15         leaq    2(%r9),%r10
16         movq    %rsp,%r11
17         negq    %r10
18         leaq    (%rsp,%r10,8),%rsp
19         andq    $-1024,%rsp
20
21         movq    %r11,8(%rsp,%r9,8)
22 .Lprologue:
23         movq    %rdx,%r12
24
25         movq    (%r8),%r8
26
27         xorq    %r14,%r14
28         xorq    %r15,%r15
29
30         movq    (%r12),%rbx
31         movq    (%rsi),%rax
32         mulq    %rbx
33         movq    %rax,%r10
34         movq    %rdx,%r11
35
36         imulq   %r8,%rax
37         movq    %rax,%rbp
38
39         mulq    (%rcx)
40         addq    %r10,%rax
41         adcq    $0,%rdx
42         movq    %rdx,%r13
43
44         leaq    1(%r15),%r15
45 .L1st:
46         movq    (%rsi,%r15,8),%rax
47         mulq    %rbx
48         addq    %r11,%rax
49         adcq    $0,%rdx
50         movq    %rax,%r10
51         movq    (%rcx,%r15,8),%rax
52         movq    %rdx,%r11
53
54         mulq    %rbp
55         addq    %r13,%rax
56         leaq    1(%r15),%r15
57         adcq    $0,%rdx
58         addq    %r10,%rax
59         adcq    $0,%rdx
60         movq    %rax,-16(%rsp,%r15,8)
61         cmpq    %r9,%r15
62         movq    %rdx,%r13
63         jl      .L1st
64
65         xorq    %rdx,%rdx
66         addq    %r11,%r13
67         adcq    $0,%rdx
68         movq    %r13,-8(%rsp,%r9,8)
69         movq    %rdx,(%rsp,%r9,8)
70
71         leaq    1(%r14),%r14
72 .align  4
73 .Louter:
74         xorq    %r15,%r15
75
76         movq    (%r12,%r14,8),%rbx
77         movq    (%rsi),%rax
78         mulq    %rbx
79         addq    (%rsp),%rax
80         adcq    $0,%rdx
81         movq    %rax,%r10
82         movq    %rdx,%r11
83
84         imulq   %r8,%rax
85         movq    %rax,%rbp
86
87         mulq    (%rcx,%r15,8)
88         addq    %r10,%rax
89         movq    8(%rsp),%r10
90         adcq    $0,%rdx
91         movq    %rdx,%r13
92
93         leaq    1(%r15),%r15
94 .align  4
95 .Linner:
96         movq    (%rsi,%r15,8),%rax
97         mulq    %rbx
98         addq    %r11,%rax
99         adcq    $0,%rdx
100         addq    %rax,%r10
101         movq    (%rcx,%r15,8),%rax
102         adcq    $0,%rdx
103         movq    %rdx,%r11
104
105         mulq    %rbp
106         addq    %r13,%rax
107         leaq    1(%r15),%r15
108         adcq    $0,%rdx
109         addq    %r10,%rax
110         adcq    $0,%rdx
111         movq    (%rsp,%r15,8),%r10
112         cmpq    %r9,%r15
113         movq    %rax,-16(%rsp,%r15,8)
114         movq    %rdx,%r13
115         jl      .Linner
116
117         xorq    %rdx,%rdx
118         addq    %r11,%r13
119         adcq    $0,%rdx
120         addq    %r10,%r13
121         adcq    $0,%rdx
122         movq    %r13,-8(%rsp,%r9,8)
123         movq    %rdx,(%rsp,%r9,8)
124
125         leaq    1(%r14),%r14
126         cmpq    %r9,%r14
127         jl      .Louter
128
129         leaq    (%rsp),%rsi
130         leaq    -1(%r9),%r15
131
132         movq    (%rsi),%rax
133         xorq    %r14,%r14
134         jmp     .Lsub
135 .align  16
136 .Lsub:  sbbq    (%rcx,%r14,8),%rax
137         movq    %rax,(%rdi,%r14,8)
138         decq    %r15
139         movq    8(%rsi,%r14,8),%rax
140         leaq    1(%r14),%r14
141         jge     .Lsub
142
143         sbbq    $0,%rax
144         andq    %rax,%rsi
145         notq    %rax
146         movq    %rdi,%rcx
147         andq    %rax,%rcx
148         leaq    -1(%r9),%r15
149         orq     %rcx,%rsi
150 .align  16
151 .Lcopy:
152         movq    (%rsi,%r15,8),%rax
153         movq    %rax,(%rdi,%r15,8)
154         movq    %r14,(%rsp,%r15,8)
155         decq    %r15
156         jge     .Lcopy
157
158         movq    8(%rsp,%r9,8),%rsi
159         movq    $1,%rax
160         movq    (%rsi),%r15
161         movq    8(%rsi),%r14
162         movq    16(%rsi),%r13
163         movq    24(%rsi),%r12
164         movq    32(%rsi),%rbp
165         movq    40(%rsi),%rbx
166         leaq    48(%rsi),%rsp
167 .Lepilogue:
168         .byte   0xf3,0xc3
169 .size   bn_mul_mont,.-bn_mul_mont
170 .byte   77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
171 .align  16