if_iwm - Adjust if_iwm_sta.h prototypes, don't pass iwm_node to rm_sta().
[dragonfly.git] / secure / lib / libcrypto / asm / aesni-gcm-x86_64.s
1 .text   
2
3 .type   _aesni_ctr32_ghash_6x,@function
4 .align  32
5 _aesni_ctr32_ghash_6x:
6         vmovdqu 32(%r11),%xmm2
7         subq    $6,%rdx
8         vpxor   %xmm4,%xmm4,%xmm4
9         vmovdqu 0-128(%rcx),%xmm15
10         vpaddb  %xmm2,%xmm1,%xmm10
11         vpaddb  %xmm2,%xmm10,%xmm11
12         vpaddb  %xmm2,%xmm11,%xmm12
13         vpaddb  %xmm2,%xmm12,%xmm13
14         vpaddb  %xmm2,%xmm13,%xmm14
15         vpxor   %xmm15,%xmm1,%xmm9
16         vmovdqu %xmm4,16+8(%rsp)
17         jmp     .Loop6x
18
19 .align  32
20 .Loop6x:
21         addl    $100663296,%ebx
22         jc      .Lhandle_ctr32
23         vmovdqu 0-32(%r9),%xmm3
24         vpaddb  %xmm2,%xmm14,%xmm1
25         vpxor   %xmm15,%xmm10,%xmm10
26         vpxor   %xmm15,%xmm11,%xmm11
27
28 .Lresume_ctr32:
29         vmovdqu %xmm1,(%r8)
30         vpclmulqdq      $0x10,%xmm3,%xmm7,%xmm5
31         vpxor   %xmm15,%xmm12,%xmm12
32         vmovups 16-128(%rcx),%xmm2
33         vpclmulqdq      $0x01,%xmm3,%xmm7,%xmm6
34         xorq    %r12,%r12
35         cmpq    %r14,%r15
36
37         vaesenc %xmm2,%xmm9,%xmm9
38         vmovdqu 48+8(%rsp),%xmm0
39         vpxor   %xmm15,%xmm13,%xmm13
40         vpclmulqdq      $0x00,%xmm3,%xmm7,%xmm1
41         vaesenc %xmm2,%xmm10,%xmm10
42         vpxor   %xmm15,%xmm14,%xmm14
43         setnc   %r12b
44         vpclmulqdq      $0x11,%xmm3,%xmm7,%xmm7
45         vaesenc %xmm2,%xmm11,%xmm11
46         vmovdqu 16-32(%r9),%xmm3
47         negq    %r12
48         vaesenc %xmm2,%xmm12,%xmm12
49         vpxor   %xmm5,%xmm6,%xmm6
50         vpclmulqdq      $0x00,%xmm3,%xmm0,%xmm5
51         vpxor   %xmm4,%xmm8,%xmm8
52         vaesenc %xmm2,%xmm13,%xmm13
53         vpxor   %xmm5,%xmm1,%xmm4
54         andq    $0x60,%r12
55         vmovups 32-128(%rcx),%xmm15
56         vpclmulqdq      $0x10,%xmm3,%xmm0,%xmm1
57         vaesenc %xmm2,%xmm14,%xmm14
58
59         vpclmulqdq      $0x01,%xmm3,%xmm0,%xmm2
60         leaq    (%r14,%r12,1),%r14
61         vaesenc %xmm15,%xmm9,%xmm9
62         vpxor   16+8(%rsp),%xmm8,%xmm8
63         vpclmulqdq      $0x11,%xmm3,%xmm0,%xmm3
64         vmovdqu 64+8(%rsp),%xmm0
65         vaesenc %xmm15,%xmm10,%xmm10
66         movbeq  88(%r14),%r13
67         vaesenc %xmm15,%xmm11,%xmm11
68         movbeq  80(%r14),%r12
69         vaesenc %xmm15,%xmm12,%xmm12
70         movq    %r13,32+8(%rsp)
71         vaesenc %xmm15,%xmm13,%xmm13
72         movq    %r12,40+8(%rsp)
73         vmovdqu 48-32(%r9),%xmm5
74         vaesenc %xmm15,%xmm14,%xmm14
75
76         vmovups 48-128(%rcx),%xmm15
77         vpxor   %xmm1,%xmm6,%xmm6
78         vpclmulqdq      $0x00,%xmm5,%xmm0,%xmm1
79         vaesenc %xmm15,%xmm9,%xmm9
80         vpxor   %xmm2,%xmm6,%xmm6
81         vpclmulqdq      $0x10,%xmm5,%xmm0,%xmm2
82         vaesenc %xmm15,%xmm10,%xmm10
83         vpxor   %xmm3,%xmm7,%xmm7
84         vpclmulqdq      $0x01,%xmm5,%xmm0,%xmm3
85         vaesenc %xmm15,%xmm11,%xmm11
86         vpclmulqdq      $0x11,%xmm5,%xmm0,%xmm5
87         vmovdqu 80+8(%rsp),%xmm0
88         vaesenc %xmm15,%xmm12,%xmm12
89         vaesenc %xmm15,%xmm13,%xmm13
90         vpxor   %xmm1,%xmm4,%xmm4
91         vmovdqu 64-32(%r9),%xmm1
92         vaesenc %xmm15,%xmm14,%xmm14
93
94         vmovups 64-128(%rcx),%xmm15
95         vpxor   %xmm2,%xmm6,%xmm6
96         vpclmulqdq      $0x00,%xmm1,%xmm0,%xmm2
97         vaesenc %xmm15,%xmm9,%xmm9
98         vpxor   %xmm3,%xmm6,%xmm6
99         vpclmulqdq      $0x10,%xmm1,%xmm0,%xmm3
100         vaesenc %xmm15,%xmm10,%xmm10
101         movbeq  72(%r14),%r13
102         vpxor   %xmm5,%xmm7,%xmm7
103         vpclmulqdq      $0x01,%xmm1,%xmm0,%xmm5
104         vaesenc %xmm15,%xmm11,%xmm11
105         movbeq  64(%r14),%r12
106         vpclmulqdq      $0x11,%xmm1,%xmm0,%xmm1
107         vmovdqu 96+8(%rsp),%xmm0
108         vaesenc %xmm15,%xmm12,%xmm12
109         movq    %r13,48+8(%rsp)
110         vaesenc %xmm15,%xmm13,%xmm13
111         movq    %r12,56+8(%rsp)
112         vpxor   %xmm2,%xmm4,%xmm4
113         vmovdqu 96-32(%r9),%xmm2
114         vaesenc %xmm15,%xmm14,%xmm14
115
116         vmovups 80-128(%rcx),%xmm15
117         vpxor   %xmm3,%xmm6,%xmm6
118         vpclmulqdq      $0x00,%xmm2,%xmm0,%xmm3
119         vaesenc %xmm15,%xmm9,%xmm9
120         vpxor   %xmm5,%xmm6,%xmm6
121         vpclmulqdq      $0x10,%xmm2,%xmm0,%xmm5
122         vaesenc %xmm15,%xmm10,%xmm10
123         movbeq  56(%r14),%r13
124         vpxor   %xmm1,%xmm7,%xmm7
125         vpclmulqdq      $0x01,%xmm2,%xmm0,%xmm1
126         vpxor   112+8(%rsp),%xmm8,%xmm8
127         vaesenc %xmm15,%xmm11,%xmm11
128         movbeq  48(%r14),%r12
129         vpclmulqdq      $0x11,%xmm2,%xmm0,%xmm2
130         vaesenc %xmm15,%xmm12,%xmm12
131         movq    %r13,64+8(%rsp)
132         vaesenc %xmm15,%xmm13,%xmm13
133         movq    %r12,72+8(%rsp)
134         vpxor   %xmm3,%xmm4,%xmm4
135         vmovdqu 112-32(%r9),%xmm3
136         vaesenc %xmm15,%xmm14,%xmm14
137
138         vmovups 96-128(%rcx),%xmm15
139         vpxor   %xmm5,%xmm6,%xmm6
140         vpclmulqdq      $0x10,%xmm3,%xmm8,%xmm5
141         vaesenc %xmm15,%xmm9,%xmm9
142         vpxor   %xmm1,%xmm6,%xmm6
143         vpclmulqdq      $0x01,%xmm3,%xmm8,%xmm1
144         vaesenc %xmm15,%xmm10,%xmm10
145         movbeq  40(%r14),%r13
146         vpxor   %xmm2,%xmm7,%xmm7
147         vpclmulqdq      $0x00,%xmm3,%xmm8,%xmm2
148         vaesenc %xmm15,%xmm11,%xmm11
149         movbeq  32(%r14),%r12
150         vpclmulqdq      $0x11,%xmm3,%xmm8,%xmm8
151         vaesenc %xmm15,%xmm12,%xmm12
152         movq    %r13,80+8(%rsp)
153         vaesenc %xmm15,%xmm13,%xmm13
154         movq    %r12,88+8(%rsp)
155         vpxor   %xmm5,%xmm6,%xmm6
156         vaesenc %xmm15,%xmm14,%xmm14
157         vpxor   %xmm1,%xmm6,%xmm6
158
159         vmovups 112-128(%rcx),%xmm15
160         vpslldq $8,%xmm6,%xmm5
161         vpxor   %xmm2,%xmm4,%xmm4
162         vmovdqu 16(%r11),%xmm3
163
164         vaesenc %xmm15,%xmm9,%xmm9
165         vpxor   %xmm8,%xmm7,%xmm7
166         vaesenc %xmm15,%xmm10,%xmm10
167         vpxor   %xmm5,%xmm4,%xmm4
168         movbeq  24(%r14),%r13
169         vaesenc %xmm15,%xmm11,%xmm11
170         movbeq  16(%r14),%r12
171         vpalignr        $8,%xmm4,%xmm4,%xmm0
172         vpclmulqdq      $0x10,%xmm3,%xmm4,%xmm4
173         movq    %r13,96+8(%rsp)
174         vaesenc %xmm15,%xmm12,%xmm12
175         movq    %r12,104+8(%rsp)
176         vaesenc %xmm15,%xmm13,%xmm13
177         vmovups 128-128(%rcx),%xmm1
178         vaesenc %xmm15,%xmm14,%xmm14
179
180         vaesenc %xmm1,%xmm9,%xmm9
181         vmovups 144-128(%rcx),%xmm15
182         vaesenc %xmm1,%xmm10,%xmm10
183         vpsrldq $8,%xmm6,%xmm6
184         vaesenc %xmm1,%xmm11,%xmm11
185         vpxor   %xmm6,%xmm7,%xmm7
186         vaesenc %xmm1,%xmm12,%xmm12
187         vpxor   %xmm0,%xmm4,%xmm4
188         movbeq  8(%r14),%r13
189         vaesenc %xmm1,%xmm13,%xmm13
190         movbeq  0(%r14),%r12
191         vaesenc %xmm1,%xmm14,%xmm14
192         vmovups 160-128(%rcx),%xmm1
193         cmpl    $11,%ebp
194         jb      .Lenc_tail
195
196         vaesenc %xmm15,%xmm9,%xmm9
197         vaesenc %xmm15,%xmm10,%xmm10
198         vaesenc %xmm15,%xmm11,%xmm11
199         vaesenc %xmm15,%xmm12,%xmm12
200         vaesenc %xmm15,%xmm13,%xmm13
201         vaesenc %xmm15,%xmm14,%xmm14
202
203         vaesenc %xmm1,%xmm9,%xmm9
204         vaesenc %xmm1,%xmm10,%xmm10
205         vaesenc %xmm1,%xmm11,%xmm11
206         vaesenc %xmm1,%xmm12,%xmm12
207         vaesenc %xmm1,%xmm13,%xmm13
208         vmovups 176-128(%rcx),%xmm15
209         vaesenc %xmm1,%xmm14,%xmm14
210         vmovups 192-128(%rcx),%xmm1
211         je      .Lenc_tail
212
213         vaesenc %xmm15,%xmm9,%xmm9
214         vaesenc %xmm15,%xmm10,%xmm10
215         vaesenc %xmm15,%xmm11,%xmm11
216         vaesenc %xmm15,%xmm12,%xmm12
217         vaesenc %xmm15,%xmm13,%xmm13
218         vaesenc %xmm15,%xmm14,%xmm14
219
220         vaesenc %xmm1,%xmm9,%xmm9
221         vaesenc %xmm1,%xmm10,%xmm10
222         vaesenc %xmm1,%xmm11,%xmm11
223         vaesenc %xmm1,%xmm12,%xmm12
224         vaesenc %xmm1,%xmm13,%xmm13
225         vmovups 208-128(%rcx),%xmm15
226         vaesenc %xmm1,%xmm14,%xmm14
227         vmovups 224-128(%rcx),%xmm1
228         jmp     .Lenc_tail
229
230 .align  32
231 .Lhandle_ctr32:
232         vmovdqu (%r11),%xmm0
233         vpshufb %xmm0,%xmm1,%xmm6
234         vmovdqu 48(%r11),%xmm5
235         vpaddd  64(%r11),%xmm6,%xmm10
236         vpaddd  %xmm5,%xmm6,%xmm11
237         vmovdqu 0-32(%r9),%xmm3
238         vpaddd  %xmm5,%xmm10,%xmm12
239         vpshufb %xmm0,%xmm10,%xmm10
240         vpaddd  %xmm5,%xmm11,%xmm13
241         vpshufb %xmm0,%xmm11,%xmm11
242         vpxor   %xmm15,%xmm10,%xmm10
243         vpaddd  %xmm5,%xmm12,%xmm14
244         vpshufb %xmm0,%xmm12,%xmm12
245         vpxor   %xmm15,%xmm11,%xmm11
246         vpaddd  %xmm5,%xmm13,%xmm1
247         vpshufb %xmm0,%xmm13,%xmm13
248         vpshufb %xmm0,%xmm14,%xmm14
249         vpshufb %xmm0,%xmm1,%xmm1
250         jmp     .Lresume_ctr32
251
252 .align  32
253 .Lenc_tail:
254         vaesenc %xmm15,%xmm9,%xmm9
255         vmovdqu %xmm7,16+8(%rsp)
256         vpalignr        $8,%xmm4,%xmm4,%xmm8
257         vaesenc %xmm15,%xmm10,%xmm10
258         vpclmulqdq      $0x10,%xmm3,%xmm4,%xmm4
259         vpxor   0(%rdi),%xmm1,%xmm2
260         vaesenc %xmm15,%xmm11,%xmm11
261         vpxor   16(%rdi),%xmm1,%xmm0
262         vaesenc %xmm15,%xmm12,%xmm12
263         vpxor   32(%rdi),%xmm1,%xmm5
264         vaesenc %xmm15,%xmm13,%xmm13
265         vpxor   48(%rdi),%xmm1,%xmm6
266         vaesenc %xmm15,%xmm14,%xmm14
267         vpxor   64(%rdi),%xmm1,%xmm7
268         vpxor   80(%rdi),%xmm1,%xmm3
269         vmovdqu (%r8),%xmm1
270
271         vaesenclast     %xmm2,%xmm9,%xmm9
272         vmovdqu 32(%r11),%xmm2
273         vaesenclast     %xmm0,%xmm10,%xmm10
274         vpaddb  %xmm2,%xmm1,%xmm0
275         movq    %r13,112+8(%rsp)
276         leaq    96(%rdi),%rdi
277         vaesenclast     %xmm5,%xmm11,%xmm11
278         vpaddb  %xmm2,%xmm0,%xmm5
279         movq    %r12,120+8(%rsp)
280         leaq    96(%rsi),%rsi
281         vmovdqu 0-128(%rcx),%xmm15
282         vaesenclast     %xmm6,%xmm12,%xmm12
283         vpaddb  %xmm2,%xmm5,%xmm6
284         vaesenclast     %xmm7,%xmm13,%xmm13
285         vpaddb  %xmm2,%xmm6,%xmm7
286         vaesenclast     %xmm3,%xmm14,%xmm14
287         vpaddb  %xmm2,%xmm7,%xmm3
288
289         addq    $0x60,%r10
290         subq    $0x6,%rdx
291         jc      .L6x_done
292
293         vmovups %xmm9,-96(%rsi)
294         vpxor   %xmm15,%xmm1,%xmm9
295         vmovups %xmm10,-80(%rsi)
296         vmovdqa %xmm0,%xmm10
297         vmovups %xmm11,-64(%rsi)
298         vmovdqa %xmm5,%xmm11
299         vmovups %xmm12,-48(%rsi)
300         vmovdqa %xmm6,%xmm12
301         vmovups %xmm13,-32(%rsi)
302         vmovdqa %xmm7,%xmm13
303         vmovups %xmm14,-16(%rsi)
304         vmovdqa %xmm3,%xmm14
305         vmovdqu 32+8(%rsp),%xmm7
306         jmp     .Loop6x
307
308 .L6x_done:
309         vpxor   16+8(%rsp),%xmm8,%xmm8
310         vpxor   %xmm4,%xmm8,%xmm8
311
312         .byte   0xf3,0xc3
313 .size   _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
314 .globl  aesni_gcm_decrypt
315 .type   aesni_gcm_decrypt,@function
316 .align  32
317 aesni_gcm_decrypt:
318         xorq    %r10,%r10
319         cmpq    $0x60,%rdx
320         jb      .Lgcm_dec_abort
321
322         leaq    (%rsp),%rax
323         pushq   %rbx
324         pushq   %rbp
325         pushq   %r12
326         pushq   %r13
327         pushq   %r14
328         pushq   %r15
329         vzeroupper
330
331         vmovdqu (%r8),%xmm1
332         addq    $-128,%rsp
333         movl    12(%r8),%ebx
334         leaq    .Lbswap_mask(%rip),%r11
335         leaq    -128(%rcx),%r14
336         movq    $0xf80,%r15
337         vmovdqu (%r9),%xmm8
338         andq    $-128,%rsp
339         vmovdqu (%r11),%xmm0
340         leaq    128(%rcx),%rcx
341         leaq    32+32(%r9),%r9
342         movl    240-128(%rcx),%ebp
343         vpshufb %xmm0,%xmm8,%xmm8
344
345         andq    %r15,%r14
346         andq    %rsp,%r15
347         subq    %r14,%r15
348         jc      .Ldec_no_key_aliasing
349         cmpq    $768,%r15
350         jnc     .Ldec_no_key_aliasing
351         subq    %r15,%rsp
352 .Ldec_no_key_aliasing:
353
354         vmovdqu 80(%rdi),%xmm7
355         leaq    (%rdi),%r14
356         vmovdqu 64(%rdi),%xmm4
357         leaq    -192(%rdi,%rdx,1),%r15
358         vmovdqu 48(%rdi),%xmm5
359         shrq    $4,%rdx
360         xorq    %r10,%r10
361         vmovdqu 32(%rdi),%xmm6
362         vpshufb %xmm0,%xmm7,%xmm7
363         vmovdqu 16(%rdi),%xmm2
364         vpshufb %xmm0,%xmm4,%xmm4
365         vmovdqu (%rdi),%xmm3
366         vpshufb %xmm0,%xmm5,%xmm5
367         vmovdqu %xmm4,48(%rsp)
368         vpshufb %xmm0,%xmm6,%xmm6
369         vmovdqu %xmm5,64(%rsp)
370         vpshufb %xmm0,%xmm2,%xmm2
371         vmovdqu %xmm6,80(%rsp)
372         vpshufb %xmm0,%xmm3,%xmm3
373         vmovdqu %xmm2,96(%rsp)
374         vmovdqu %xmm3,112(%rsp)
375
376         call    _aesni_ctr32_ghash_6x
377
378         vmovups %xmm9,-96(%rsi)
379         vmovups %xmm10,-80(%rsi)
380         vmovups %xmm11,-64(%rsi)
381         vmovups %xmm12,-48(%rsi)
382         vmovups %xmm13,-32(%rsi)
383         vmovups %xmm14,-16(%rsi)
384
385         vpshufb (%r11),%xmm8,%xmm8
386         vmovdqu %xmm8,-64(%r9)
387
388         vzeroupper
389         movq    -48(%rax),%r15
390         movq    -40(%rax),%r14
391         movq    -32(%rax),%r13
392         movq    -24(%rax),%r12
393         movq    -16(%rax),%rbp
394         movq    -8(%rax),%rbx
395         leaq    (%rax),%rsp
396 .Lgcm_dec_abort:
397         movq    %r10,%rax
398         .byte   0xf3,0xc3
399 .size   aesni_gcm_decrypt,.-aesni_gcm_decrypt
400 .type   _aesni_ctr32_6x,@function
401 .align  32
402 _aesni_ctr32_6x:
403         vmovdqu 0-128(%rcx),%xmm4
404         vmovdqu 32(%r11),%xmm2
405         leaq    -1(%rbp),%r13
406         vmovups 16-128(%rcx),%xmm15
407         leaq    32-128(%rcx),%r12
408         vpxor   %xmm4,%xmm1,%xmm9
409         addl    $100663296,%ebx
410         jc      .Lhandle_ctr32_2
411         vpaddb  %xmm2,%xmm1,%xmm10
412         vpaddb  %xmm2,%xmm10,%xmm11
413         vpxor   %xmm4,%xmm10,%xmm10
414         vpaddb  %xmm2,%xmm11,%xmm12
415         vpxor   %xmm4,%xmm11,%xmm11
416         vpaddb  %xmm2,%xmm12,%xmm13
417         vpxor   %xmm4,%xmm12,%xmm12
418         vpaddb  %xmm2,%xmm13,%xmm14
419         vpxor   %xmm4,%xmm13,%xmm13
420         vpaddb  %xmm2,%xmm14,%xmm1
421         vpxor   %xmm4,%xmm14,%xmm14
422         jmp     .Loop_ctr32
423
424 .align  16
425 .Loop_ctr32:
426         vaesenc %xmm15,%xmm9,%xmm9
427         vaesenc %xmm15,%xmm10,%xmm10
428         vaesenc %xmm15,%xmm11,%xmm11
429         vaesenc %xmm15,%xmm12,%xmm12
430         vaesenc %xmm15,%xmm13,%xmm13
431         vaesenc %xmm15,%xmm14,%xmm14
432         vmovups (%r12),%xmm15
433         leaq    16(%r12),%r12
434         decl    %r13d
435         jnz     .Loop_ctr32
436
437         vmovdqu (%r12),%xmm3
438         vaesenc %xmm15,%xmm9,%xmm9
439         vpxor   0(%rdi),%xmm3,%xmm4
440         vaesenc %xmm15,%xmm10,%xmm10
441         vpxor   16(%rdi),%xmm3,%xmm5
442         vaesenc %xmm15,%xmm11,%xmm11
443         vpxor   32(%rdi),%xmm3,%xmm6
444         vaesenc %xmm15,%xmm12,%xmm12
445         vpxor   48(%rdi),%xmm3,%xmm8
446         vaesenc %xmm15,%xmm13,%xmm13
447         vpxor   64(%rdi),%xmm3,%xmm2
448         vaesenc %xmm15,%xmm14,%xmm14
449         vpxor   80(%rdi),%xmm3,%xmm3
450         leaq    96(%rdi),%rdi
451
452         vaesenclast     %xmm4,%xmm9,%xmm9
453         vaesenclast     %xmm5,%xmm10,%xmm10
454         vaesenclast     %xmm6,%xmm11,%xmm11
455         vaesenclast     %xmm8,%xmm12,%xmm12
456         vaesenclast     %xmm2,%xmm13,%xmm13
457         vaesenclast     %xmm3,%xmm14,%xmm14
458         vmovups %xmm9,0(%rsi)
459         vmovups %xmm10,16(%rsi)
460         vmovups %xmm11,32(%rsi)
461         vmovups %xmm12,48(%rsi)
462         vmovups %xmm13,64(%rsi)
463         vmovups %xmm14,80(%rsi)
464         leaq    96(%rsi),%rsi
465
466         .byte   0xf3,0xc3
467 .align  32
468 .Lhandle_ctr32_2:
469         vpshufb %xmm0,%xmm1,%xmm6
470         vmovdqu 48(%r11),%xmm5
471         vpaddd  64(%r11),%xmm6,%xmm10
472         vpaddd  %xmm5,%xmm6,%xmm11
473         vpaddd  %xmm5,%xmm10,%xmm12
474         vpshufb %xmm0,%xmm10,%xmm10
475         vpaddd  %xmm5,%xmm11,%xmm13
476         vpshufb %xmm0,%xmm11,%xmm11
477         vpxor   %xmm4,%xmm10,%xmm10
478         vpaddd  %xmm5,%xmm12,%xmm14
479         vpshufb %xmm0,%xmm12,%xmm12
480         vpxor   %xmm4,%xmm11,%xmm11
481         vpaddd  %xmm5,%xmm13,%xmm1
482         vpshufb %xmm0,%xmm13,%xmm13
483         vpxor   %xmm4,%xmm12,%xmm12
484         vpshufb %xmm0,%xmm14,%xmm14
485         vpxor   %xmm4,%xmm13,%xmm13
486         vpshufb %xmm0,%xmm1,%xmm1
487         vpxor   %xmm4,%xmm14,%xmm14
488         jmp     .Loop_ctr32
489 .size   _aesni_ctr32_6x,.-_aesni_ctr32_6x
490
491 .globl  aesni_gcm_encrypt
492 .type   aesni_gcm_encrypt,@function
493 .align  32
494 aesni_gcm_encrypt:
495         xorq    %r10,%r10
496         cmpq    $288,%rdx
497         jb      .Lgcm_enc_abort
498
499         leaq    (%rsp),%rax
500         pushq   %rbx
501         pushq   %rbp
502         pushq   %r12
503         pushq   %r13
504         pushq   %r14
505         pushq   %r15
506         vzeroupper
507
508         vmovdqu (%r8),%xmm1
509         addq    $-128,%rsp
510         movl    12(%r8),%ebx
511         leaq    .Lbswap_mask(%rip),%r11
512         leaq    -128(%rcx),%r14
513         movq    $0xf80,%r15
514         leaq    128(%rcx),%rcx
515         vmovdqu (%r11),%xmm0
516         andq    $-128,%rsp
517         movl    240-128(%rcx),%ebp
518
519         andq    %r15,%r14
520         andq    %rsp,%r15
521         subq    %r14,%r15
522         jc      .Lenc_no_key_aliasing
523         cmpq    $768,%r15
524         jnc     .Lenc_no_key_aliasing
525         subq    %r15,%rsp
526 .Lenc_no_key_aliasing:
527
528         leaq    (%rsi),%r14
529         leaq    -192(%rsi,%rdx,1),%r15
530         shrq    $4,%rdx
531
532         call    _aesni_ctr32_6x
533         vpshufb %xmm0,%xmm9,%xmm8
534         vpshufb %xmm0,%xmm10,%xmm2
535         vmovdqu %xmm8,112(%rsp)
536         vpshufb %xmm0,%xmm11,%xmm4
537         vmovdqu %xmm2,96(%rsp)
538         vpshufb %xmm0,%xmm12,%xmm5
539         vmovdqu %xmm4,80(%rsp)
540         vpshufb %xmm0,%xmm13,%xmm6
541         vmovdqu %xmm5,64(%rsp)
542         vpshufb %xmm0,%xmm14,%xmm7
543         vmovdqu %xmm6,48(%rsp)
544
545         call    _aesni_ctr32_6x
546
547         vmovdqu (%r9),%xmm8
548         leaq    32+32(%r9),%r9
549         subq    $12,%rdx
550         movq    $192,%r10
551         vpshufb %xmm0,%xmm8,%xmm8
552
553         call    _aesni_ctr32_ghash_6x
554         vmovdqu 32(%rsp),%xmm7
555         vmovdqu (%r11),%xmm0
556         vmovdqu 0-32(%r9),%xmm3
557         vpunpckhqdq     %xmm7,%xmm7,%xmm1
558         vmovdqu 32-32(%r9),%xmm15
559         vmovups %xmm9,-96(%rsi)
560         vpshufb %xmm0,%xmm9,%xmm9
561         vpxor   %xmm7,%xmm1,%xmm1
562         vmovups %xmm10,-80(%rsi)
563         vpshufb %xmm0,%xmm10,%xmm10
564         vmovups %xmm11,-64(%rsi)
565         vpshufb %xmm0,%xmm11,%xmm11
566         vmovups %xmm12,-48(%rsi)
567         vpshufb %xmm0,%xmm12,%xmm12
568         vmovups %xmm13,-32(%rsi)
569         vpshufb %xmm0,%xmm13,%xmm13
570         vmovups %xmm14,-16(%rsi)
571         vpshufb %xmm0,%xmm14,%xmm14
572         vmovdqu %xmm9,16(%rsp)
573         vmovdqu 48(%rsp),%xmm6
574         vmovdqu 16-32(%r9),%xmm0
575         vpunpckhqdq     %xmm6,%xmm6,%xmm2
576         vpclmulqdq      $0x00,%xmm3,%xmm7,%xmm5
577         vpxor   %xmm6,%xmm2,%xmm2
578         vpclmulqdq      $0x11,%xmm3,%xmm7,%xmm7
579         vpclmulqdq      $0x00,%xmm15,%xmm1,%xmm1
580
581         vmovdqu 64(%rsp),%xmm9
582         vpclmulqdq      $0x00,%xmm0,%xmm6,%xmm4
583         vmovdqu 48-32(%r9),%xmm3
584         vpxor   %xmm5,%xmm4,%xmm4
585         vpunpckhqdq     %xmm9,%xmm9,%xmm5
586         vpclmulqdq      $0x11,%xmm0,%xmm6,%xmm6
587         vpxor   %xmm9,%xmm5,%xmm5
588         vpxor   %xmm7,%xmm6,%xmm6
589         vpclmulqdq      $0x10,%xmm15,%xmm2,%xmm2
590         vmovdqu 80-32(%r9),%xmm15
591         vpxor   %xmm1,%xmm2,%xmm2
592
593         vmovdqu 80(%rsp),%xmm1
594         vpclmulqdq      $0x00,%xmm3,%xmm9,%xmm7
595         vmovdqu 64-32(%r9),%xmm0
596         vpxor   %xmm4,%xmm7,%xmm7
597         vpunpckhqdq     %xmm1,%xmm1,%xmm4
598         vpclmulqdq      $0x11,%xmm3,%xmm9,%xmm9
599         vpxor   %xmm1,%xmm4,%xmm4
600         vpxor   %xmm6,%xmm9,%xmm9
601         vpclmulqdq      $0x00,%xmm15,%xmm5,%xmm5
602         vpxor   %xmm2,%xmm5,%xmm5
603
604         vmovdqu 96(%rsp),%xmm2
605         vpclmulqdq      $0x00,%xmm0,%xmm1,%xmm6
606         vmovdqu 96-32(%r9),%xmm3
607         vpxor   %xmm7,%xmm6,%xmm6
608         vpunpckhqdq     %xmm2,%xmm2,%xmm7
609         vpclmulqdq      $0x11,%xmm0,%xmm1,%xmm1
610         vpxor   %xmm2,%xmm7,%xmm7
611         vpxor   %xmm9,%xmm1,%xmm1
612         vpclmulqdq      $0x10,%xmm15,%xmm4,%xmm4
613         vmovdqu 128-32(%r9),%xmm15
614         vpxor   %xmm5,%xmm4,%xmm4
615
616         vpxor   112(%rsp),%xmm8,%xmm8
617         vpclmulqdq      $0x00,%xmm3,%xmm2,%xmm5
618         vmovdqu 112-32(%r9),%xmm0
619         vpunpckhqdq     %xmm8,%xmm8,%xmm9
620         vpxor   %xmm6,%xmm5,%xmm5
621         vpclmulqdq      $0x11,%xmm3,%xmm2,%xmm2
622         vpxor   %xmm8,%xmm9,%xmm9
623         vpxor   %xmm1,%xmm2,%xmm2
624         vpclmulqdq      $0x00,%xmm15,%xmm7,%xmm7
625         vpxor   %xmm4,%xmm7,%xmm4
626
627         vpclmulqdq      $0x00,%xmm0,%xmm8,%xmm6
628         vmovdqu 0-32(%r9),%xmm3
629         vpunpckhqdq     %xmm14,%xmm14,%xmm1
630         vpclmulqdq      $0x11,%xmm0,%xmm8,%xmm8
631         vpxor   %xmm14,%xmm1,%xmm1
632         vpxor   %xmm5,%xmm6,%xmm5
633         vpclmulqdq      $0x10,%xmm15,%xmm9,%xmm9
634         vmovdqu 32-32(%r9),%xmm15
635         vpxor   %xmm2,%xmm8,%xmm7
636         vpxor   %xmm4,%xmm9,%xmm6
637
638         vmovdqu 16-32(%r9),%xmm0
639         vpxor   %xmm5,%xmm7,%xmm9
640         vpclmulqdq      $0x00,%xmm3,%xmm14,%xmm4
641         vpxor   %xmm9,%xmm6,%xmm6
642         vpunpckhqdq     %xmm13,%xmm13,%xmm2
643         vpclmulqdq      $0x11,%xmm3,%xmm14,%xmm14
644         vpxor   %xmm13,%xmm2,%xmm2
645         vpslldq $8,%xmm6,%xmm9
646         vpclmulqdq      $0x00,%xmm15,%xmm1,%xmm1
647         vpxor   %xmm9,%xmm5,%xmm8
648         vpsrldq $8,%xmm6,%xmm6
649         vpxor   %xmm6,%xmm7,%xmm7
650
651         vpclmulqdq      $0x00,%xmm0,%xmm13,%xmm5
652         vmovdqu 48-32(%r9),%xmm3
653         vpxor   %xmm4,%xmm5,%xmm5
654         vpunpckhqdq     %xmm12,%xmm12,%xmm9
655         vpclmulqdq      $0x11,%xmm0,%xmm13,%xmm13
656         vpxor   %xmm12,%xmm9,%xmm9
657         vpxor   %xmm14,%xmm13,%xmm13
658         vpalignr        $8,%xmm8,%xmm8,%xmm14
659         vpclmulqdq      $0x10,%xmm15,%xmm2,%xmm2
660         vmovdqu 80-32(%r9),%xmm15
661         vpxor   %xmm1,%xmm2,%xmm2
662
663         vpclmulqdq      $0x00,%xmm3,%xmm12,%xmm4
664         vmovdqu 64-32(%r9),%xmm0
665         vpxor   %xmm5,%xmm4,%xmm4
666         vpunpckhqdq     %xmm11,%xmm11,%xmm1
667         vpclmulqdq      $0x11,%xmm3,%xmm12,%xmm12
668         vpxor   %xmm11,%xmm1,%xmm1
669         vpxor   %xmm13,%xmm12,%xmm12
670         vxorps  16(%rsp),%xmm7,%xmm7
671         vpclmulqdq      $0x00,%xmm15,%xmm9,%xmm9
672         vpxor   %xmm2,%xmm9,%xmm9
673
674         vpclmulqdq      $0x10,16(%r11),%xmm8,%xmm8
675         vxorps  %xmm14,%xmm8,%xmm8
676
677         vpclmulqdq      $0x00,%xmm0,%xmm11,%xmm5
678         vmovdqu 96-32(%r9),%xmm3
679         vpxor   %xmm4,%xmm5,%xmm5
680         vpunpckhqdq     %xmm10,%xmm10,%xmm2
681         vpclmulqdq      $0x11,%xmm0,%xmm11,%xmm11
682         vpxor   %xmm10,%xmm2,%xmm2
683         vpalignr        $8,%xmm8,%xmm8,%xmm14
684         vpxor   %xmm12,%xmm11,%xmm11
685         vpclmulqdq      $0x10,%xmm15,%xmm1,%xmm1
686         vmovdqu 128-32(%r9),%xmm15
687         vpxor   %xmm9,%xmm1,%xmm1
688
689         vxorps  %xmm7,%xmm14,%xmm14
690         vpclmulqdq      $0x10,16(%r11),%xmm8,%xmm8
691         vxorps  %xmm14,%xmm8,%xmm8
692
693         vpclmulqdq      $0x00,%xmm3,%xmm10,%xmm4
694         vmovdqu 112-32(%r9),%xmm0
695         vpxor   %xmm5,%xmm4,%xmm4
696         vpunpckhqdq     %xmm8,%xmm8,%xmm9
697         vpclmulqdq      $0x11,%xmm3,%xmm10,%xmm10
698         vpxor   %xmm8,%xmm9,%xmm9
699         vpxor   %xmm11,%xmm10,%xmm10
700         vpclmulqdq      $0x00,%xmm15,%xmm2,%xmm2
701         vpxor   %xmm1,%xmm2,%xmm2
702
703         vpclmulqdq      $0x00,%xmm0,%xmm8,%xmm5
704         vpclmulqdq      $0x11,%xmm0,%xmm8,%xmm7
705         vpxor   %xmm4,%xmm5,%xmm5
706         vpclmulqdq      $0x10,%xmm15,%xmm9,%xmm6
707         vpxor   %xmm10,%xmm7,%xmm7
708         vpxor   %xmm2,%xmm6,%xmm6
709
710         vpxor   %xmm5,%xmm7,%xmm4
711         vpxor   %xmm4,%xmm6,%xmm6
712         vpslldq $8,%xmm6,%xmm1
713         vmovdqu 16(%r11),%xmm3
714         vpsrldq $8,%xmm6,%xmm6
715         vpxor   %xmm1,%xmm5,%xmm8
716         vpxor   %xmm6,%xmm7,%xmm7
717
718         vpalignr        $8,%xmm8,%xmm8,%xmm2
719         vpclmulqdq      $0x10,%xmm3,%xmm8,%xmm8
720         vpxor   %xmm2,%xmm8,%xmm8
721
722         vpalignr        $8,%xmm8,%xmm8,%xmm2
723         vpclmulqdq      $0x10,%xmm3,%xmm8,%xmm8
724         vpxor   %xmm7,%xmm2,%xmm2
725         vpxor   %xmm2,%xmm8,%xmm8
726         vpshufb (%r11),%xmm8,%xmm8
727         vmovdqu %xmm8,-64(%r9)
728
729         vzeroupper
730         movq    -48(%rax),%r15
731         movq    -40(%rax),%r14
732         movq    -32(%rax),%r13
733         movq    -24(%rax),%r12
734         movq    -16(%rax),%rbp
735         movq    -8(%rax),%rbx
736         leaq    (%rax),%rsp
737 .Lgcm_enc_abort:
738         movq    %r10,%rax
739         .byte   0xf3,0xc3
740 .size   aesni_gcm_encrypt,.-aesni_gcm_encrypt
741 .align  64
742 .Lbswap_mask:
743 .byte   15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
744 .Lpoly:
745 .byte   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
746 .Lone_msb:
747 .byte   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
748 .Ltwo_lsb:
749 .byte   2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
750 .Lone_lsb:
751 .byte   1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
752 .byte   65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
753 .align  64
754 .section .note.GNU-stack,"",%progbits