Update LibreSSL from version 2.4.4 => 2.9.1
[dragonfly.git] / crypto / libressl / crypto / modes / ghash-elf-x86_64.S
1 #include "x86_arch.h"
2 .text   
3
4 .globl  gcm_gmult_4bit
5 .type   gcm_gmult_4bit,@function
6 .align  16
7 gcm_gmult_4bit:
8         pushq   %rbx
9         pushq   %rbp
10         pushq   %r12
11 .Lgmult_prologue:
12
13         movzbq  15(%rdi),%r8
14         leaq    .Lrem_4bit(%rip),%r11
15         xorq    %rax,%rax
16         xorq    %rbx,%rbx
17         movb    %r8b,%al
18         movb    %r8b,%bl
19         shlb    $4,%al
20         movq    $14,%rcx
21         movq    8(%rsi,%rax,1),%r8
22         movq    (%rsi,%rax,1),%r9
23         andb    $240,%bl
24         movq    %r8,%rdx
25         jmp     .Loop1
26
27 .align  16
28 .Loop1:
29         shrq    $4,%r8
30         andq    $15,%rdx
31         movq    %r9,%r10
32         movb    (%rdi,%rcx,1),%al
33         shrq    $4,%r9
34         xorq    8(%rsi,%rbx,1),%r8
35         shlq    $60,%r10
36         xorq    (%rsi,%rbx,1),%r9
37         movb    %al,%bl
38         xorq    (%r11,%rdx,8),%r9
39         movq    %r8,%rdx
40         shlb    $4,%al
41         xorq    %r10,%r8
42         decq    %rcx
43         js      .Lbreak1
44
45         shrq    $4,%r8
46         andq    $15,%rdx
47         movq    %r9,%r10
48         shrq    $4,%r9
49         xorq    8(%rsi,%rax,1),%r8
50         shlq    $60,%r10
51         xorq    (%rsi,%rax,1),%r9
52         andb    $240,%bl
53         xorq    (%r11,%rdx,8),%r9
54         movq    %r8,%rdx
55         xorq    %r10,%r8
56         jmp     .Loop1
57
58 .align  16
59 .Lbreak1:
60         shrq    $4,%r8
61         andq    $15,%rdx
62         movq    %r9,%r10
63         shrq    $4,%r9
64         xorq    8(%rsi,%rax,1),%r8
65         shlq    $60,%r10
66         xorq    (%rsi,%rax,1),%r9
67         andb    $240,%bl
68         xorq    (%r11,%rdx,8),%r9
69         movq    %r8,%rdx
70         xorq    %r10,%r8
71
72         shrq    $4,%r8
73         andq    $15,%rdx
74         movq    %r9,%r10
75         shrq    $4,%r9
76         xorq    8(%rsi,%rbx,1),%r8
77         shlq    $60,%r10
78         xorq    (%rsi,%rbx,1),%r9
79         xorq    %r10,%r8
80         xorq    (%r11,%rdx,8),%r9
81
82         bswapq  %r8
83         bswapq  %r9
84         movq    %r8,8(%rdi)
85         movq    %r9,(%rdi)
86
87         movq    16(%rsp),%rbx
88         leaq    24(%rsp),%rsp
89 .Lgmult_epilogue:
90         retq
91 .size   gcm_gmult_4bit,.-gcm_gmult_4bit
92 .globl  gcm_ghash_4bit
93 .type   gcm_ghash_4bit,@function
94 .align  16
95 gcm_ghash_4bit:
96         pushq   %rbx
97         pushq   %rbp
98         pushq   %r12
99         pushq   %r13
100         pushq   %r14
101         pushq   %r15
102         subq    $280,%rsp
103 .Lghash_prologue:
104         movq    %rdx,%r14
105         movq    %rcx,%r15
106         subq    $-128,%rsi
107         leaq    16+128(%rsp),%rbp
108         xorl    %edx,%edx
109         movq    0+0-128(%rsi),%r8
110         movq    0+8-128(%rsi),%rax
111         movb    %al,%dl
112         shrq    $4,%rax
113         movq    %r8,%r10
114         shrq    $4,%r8
115         movq    16+0-128(%rsi),%r9
116         shlb    $4,%dl
117         movq    16+8-128(%rsi),%rbx
118         shlq    $60,%r10
119         movb    %dl,0(%rsp)
120         orq     %r10,%rax
121         movb    %bl,%dl
122         shrq    $4,%rbx
123         movq    %r9,%r10
124         shrq    $4,%r9
125         movq    %r8,0(%rbp)
126         movq    32+0-128(%rsi),%r8
127         shlb    $4,%dl
128         movq    %rax,0-128(%rbp)
129         movq    32+8-128(%rsi),%rax
130         shlq    $60,%r10
131         movb    %dl,1(%rsp)
132         orq     %r10,%rbx
133         movb    %al,%dl
134         shrq    $4,%rax
135         movq    %r8,%r10
136         shrq    $4,%r8
137         movq    %r9,8(%rbp)
138         movq    48+0-128(%rsi),%r9
139         shlb    $4,%dl
140         movq    %rbx,8-128(%rbp)
141         movq    48+8-128(%rsi),%rbx
142         shlq    $60,%r10
143         movb    %dl,2(%rsp)
144         orq     %r10,%rax
145         movb    %bl,%dl
146         shrq    $4,%rbx
147         movq    %r9,%r10
148         shrq    $4,%r9
149         movq    %r8,16(%rbp)
150         movq    64+0-128(%rsi),%r8
151         shlb    $4,%dl
152         movq    %rax,16-128(%rbp)
153         movq    64+8-128(%rsi),%rax
154         shlq    $60,%r10
155         movb    %dl,3(%rsp)
156         orq     %r10,%rbx
157         movb    %al,%dl
158         shrq    $4,%rax
159         movq    %r8,%r10
160         shrq    $4,%r8
161         movq    %r9,24(%rbp)
162         movq    80+0-128(%rsi),%r9
163         shlb    $4,%dl
164         movq    %rbx,24-128(%rbp)
165         movq    80+8-128(%rsi),%rbx
166         shlq    $60,%r10
167         movb    %dl,4(%rsp)
168         orq     %r10,%rax
169         movb    %bl,%dl
170         shrq    $4,%rbx
171         movq    %r9,%r10
172         shrq    $4,%r9
173         movq    %r8,32(%rbp)
174         movq    96+0-128(%rsi),%r8
175         shlb    $4,%dl
176         movq    %rax,32-128(%rbp)
177         movq    96+8-128(%rsi),%rax
178         shlq    $60,%r10
179         movb    %dl,5(%rsp)
180         orq     %r10,%rbx
181         movb    %al,%dl
182         shrq    $4,%rax
183         movq    %r8,%r10
184         shrq    $4,%r8
185         movq    %r9,40(%rbp)
186         movq    112+0-128(%rsi),%r9
187         shlb    $4,%dl
188         movq    %rbx,40-128(%rbp)
189         movq    112+8-128(%rsi),%rbx
190         shlq    $60,%r10
191         movb    %dl,6(%rsp)
192         orq     %r10,%rax
193         movb    %bl,%dl
194         shrq    $4,%rbx
195         movq    %r9,%r10
196         shrq    $4,%r9
197         movq    %r8,48(%rbp)
198         movq    128+0-128(%rsi),%r8
199         shlb    $4,%dl
200         movq    %rax,48-128(%rbp)
201         movq    128+8-128(%rsi),%rax
202         shlq    $60,%r10
203         movb    %dl,7(%rsp)
204         orq     %r10,%rbx
205         movb    %al,%dl
206         shrq    $4,%rax
207         movq    %r8,%r10
208         shrq    $4,%r8
209         movq    %r9,56(%rbp)
210         movq    144+0-128(%rsi),%r9
211         shlb    $4,%dl
212         movq    %rbx,56-128(%rbp)
213         movq    144+8-128(%rsi),%rbx
214         shlq    $60,%r10
215         movb    %dl,8(%rsp)
216         orq     %r10,%rax
217         movb    %bl,%dl
218         shrq    $4,%rbx
219         movq    %r9,%r10
220         shrq    $4,%r9
221         movq    %r8,64(%rbp)
222         movq    160+0-128(%rsi),%r8
223         shlb    $4,%dl
224         movq    %rax,64-128(%rbp)
225         movq    160+8-128(%rsi),%rax
226         shlq    $60,%r10
227         movb    %dl,9(%rsp)
228         orq     %r10,%rbx
229         movb    %al,%dl
230         shrq    $4,%rax
231         movq    %r8,%r10
232         shrq    $4,%r8
233         movq    %r9,72(%rbp)
234         movq    176+0-128(%rsi),%r9
235         shlb    $4,%dl
236         movq    %rbx,72-128(%rbp)
237         movq    176+8-128(%rsi),%rbx
238         shlq    $60,%r10
239         movb    %dl,10(%rsp)
240         orq     %r10,%rax
241         movb    %bl,%dl
242         shrq    $4,%rbx
243         movq    %r9,%r10
244         shrq    $4,%r9
245         movq    %r8,80(%rbp)
246         movq    192+0-128(%rsi),%r8
247         shlb    $4,%dl
248         movq    %rax,80-128(%rbp)
249         movq    192+8-128(%rsi),%rax
250         shlq    $60,%r10
251         movb    %dl,11(%rsp)
252         orq     %r10,%rbx
253         movb    %al,%dl
254         shrq    $4,%rax
255         movq    %r8,%r10
256         shrq    $4,%r8
257         movq    %r9,88(%rbp)
258         movq    208+0-128(%rsi),%r9
259         shlb    $4,%dl
260         movq    %rbx,88-128(%rbp)
261         movq    208+8-128(%rsi),%rbx
262         shlq    $60,%r10
263         movb    %dl,12(%rsp)
264         orq     %r10,%rax
265         movb    %bl,%dl
266         shrq    $4,%rbx
267         movq    %r9,%r10
268         shrq    $4,%r9
269         movq    %r8,96(%rbp)
270         movq    224+0-128(%rsi),%r8
271         shlb    $4,%dl
272         movq    %rax,96-128(%rbp)
273         movq    224+8-128(%rsi),%rax
274         shlq    $60,%r10
275         movb    %dl,13(%rsp)
276         orq     %r10,%rbx
277         movb    %al,%dl
278         shrq    $4,%rax
279         movq    %r8,%r10
280         shrq    $4,%r8
281         movq    %r9,104(%rbp)
282         movq    240+0-128(%rsi),%r9
283         shlb    $4,%dl
284         movq    %rbx,104-128(%rbp)
285         movq    240+8-128(%rsi),%rbx
286         shlq    $60,%r10
287         movb    %dl,14(%rsp)
288         orq     %r10,%rax
289         movb    %bl,%dl
290         shrq    $4,%rbx
291         movq    %r9,%r10
292         shrq    $4,%r9
293         movq    %r8,112(%rbp)
294         shlb    $4,%dl
295         movq    %rax,112-128(%rbp)
296         shlq    $60,%r10
297         movb    %dl,15(%rsp)
298         orq     %r10,%rbx
299         movq    %r9,120(%rbp)
300         movq    %rbx,120-128(%rbp)
301         addq    $-128,%rsi
302         movq    8(%rdi),%r8
303         movq    0(%rdi),%r9
304         addq    %r14,%r15
305         leaq    .Lrem_8bit(%rip),%r11
306         jmp     .Louter_loop
307 .align  16
308 .Louter_loop:
309         xorq    (%r14),%r9
310         movq    8(%r14),%rdx
311         leaq    16(%r14),%r14
312         xorq    %r8,%rdx
313         movq    %r9,(%rdi)
314         movq    %rdx,8(%rdi)
315         shrq    $32,%rdx
316         xorq    %rax,%rax
317         roll    $8,%edx
318         movb    %dl,%al
319         movzbl  %dl,%ebx
320         shlb    $4,%al
321         shrl    $4,%ebx
322         roll    $8,%edx
323         movq    8(%rsi,%rax,1),%r8
324         movq    (%rsi,%rax,1),%r9
325         movb    %dl,%al
326         movzbl  %dl,%ecx
327         shlb    $4,%al
328         movzbq  (%rsp,%rbx,1),%r12
329         shrl    $4,%ecx
330         xorq    %r8,%r12
331         movq    %r9,%r10
332         shrq    $8,%r8
333         movzbq  %r12b,%r12
334         shrq    $8,%r9
335         xorq    -128(%rbp,%rbx,8),%r8
336         shlq    $56,%r10
337         xorq    (%rbp,%rbx,8),%r9
338         roll    $8,%edx
339         xorq    8(%rsi,%rax,1),%r8
340         xorq    (%rsi,%rax,1),%r9
341         movb    %dl,%al
342         xorq    %r10,%r8
343         movzwq  (%r11,%r12,2),%r12
344         movzbl  %dl,%ebx
345         shlb    $4,%al
346         movzbq  (%rsp,%rcx,1),%r13
347         shrl    $4,%ebx
348         shlq    $48,%r12
349         xorq    %r8,%r13
350         movq    %r9,%r10
351         xorq    %r12,%r9
352         shrq    $8,%r8
353         movzbq  %r13b,%r13
354         shrq    $8,%r9
355         xorq    -128(%rbp,%rcx,8),%r8
356         shlq    $56,%r10
357         xorq    (%rbp,%rcx,8),%r9
358         roll    $8,%edx
359         xorq    8(%rsi,%rax,1),%r8
360         xorq    (%rsi,%rax,1),%r9
361         movb    %dl,%al
362         xorq    %r10,%r8
363         movzwq  (%r11,%r13,2),%r13
364         movzbl  %dl,%ecx
365         shlb    $4,%al
366         movzbq  (%rsp,%rbx,1),%r12
367         shrl    $4,%ecx
368         shlq    $48,%r13
369         xorq    %r8,%r12
370         movq    %r9,%r10
371         xorq    %r13,%r9
372         shrq    $8,%r8
373         movzbq  %r12b,%r12
374         movl    8(%rdi),%edx
375         shrq    $8,%r9
376         xorq    -128(%rbp,%rbx,8),%r8
377         shlq    $56,%r10
378         xorq    (%rbp,%rbx,8),%r9
379         roll    $8,%edx
380         xorq    8(%rsi,%rax,1),%r8
381         xorq    (%rsi,%rax,1),%r9
382         movb    %dl,%al
383         xorq    %r10,%r8
384         movzwq  (%r11,%r12,2),%r12
385         movzbl  %dl,%ebx
386         shlb    $4,%al
387         movzbq  (%rsp,%rcx,1),%r13
388         shrl    $4,%ebx
389         shlq    $48,%r12
390         xorq    %r8,%r13
391         movq    %r9,%r10
392         xorq    %r12,%r9
393         shrq    $8,%r8
394         movzbq  %r13b,%r13
395         shrq    $8,%r9
396         xorq    -128(%rbp,%rcx,8),%r8
397         shlq    $56,%r10
398         xorq    (%rbp,%rcx,8),%r9
399         roll    $8,%edx
400         xorq    8(%rsi,%rax,1),%r8
401         xorq    (%rsi,%rax,1),%r9
402         movb    %dl,%al
403         xorq    %r10,%r8
404         movzwq  (%r11,%r13,2),%r13
405         movzbl  %dl,%ecx
406         shlb    $4,%al
407         movzbq  (%rsp,%rbx,1),%r12
408         shrl    $4,%ecx
409         shlq    $48,%r13
410         xorq    %r8,%r12
411         movq    %r9,%r10
412         xorq    %r13,%r9
413         shrq    $8,%r8
414         movzbq  %r12b,%r12
415         shrq    $8,%r9
416         xorq    -128(%rbp,%rbx,8),%r8
417         shlq    $56,%r10
418         xorq    (%rbp,%rbx,8),%r9
419         roll    $8,%edx
420         xorq    8(%rsi,%rax,1),%r8
421         xorq    (%rsi,%rax,1),%r9
422         movb    %dl,%al
423         xorq    %r10,%r8
424         movzwq  (%r11,%r12,2),%r12
425         movzbl  %dl,%ebx
426         shlb    $4,%al
427         movzbq  (%rsp,%rcx,1),%r13
428         shrl    $4,%ebx
429         shlq    $48,%r12
430         xorq    %r8,%r13
431         movq    %r9,%r10
432         xorq    %r12,%r9
433         shrq    $8,%r8
434         movzbq  %r13b,%r13
435         shrq    $8,%r9
436         xorq    -128(%rbp,%rcx,8),%r8
437         shlq    $56,%r10
438         xorq    (%rbp,%rcx,8),%r9
439         roll    $8,%edx
440         xorq    8(%rsi,%rax,1),%r8
441         xorq    (%rsi,%rax,1),%r9
442         movb    %dl,%al
443         xorq    %r10,%r8
444         movzwq  (%r11,%r13,2),%r13
445         movzbl  %dl,%ecx
446         shlb    $4,%al
447         movzbq  (%rsp,%rbx,1),%r12
448         shrl    $4,%ecx
449         shlq    $48,%r13
450         xorq    %r8,%r12
451         movq    %r9,%r10
452         xorq    %r13,%r9
453         shrq    $8,%r8
454         movzbq  %r12b,%r12
455         movl    4(%rdi),%edx
456         shrq    $8,%r9
457         xorq    -128(%rbp,%rbx,8),%r8
458         shlq    $56,%r10
459         xorq    (%rbp,%rbx,8),%r9
460         roll    $8,%edx
461         xorq    8(%rsi,%rax,1),%r8
462         xorq    (%rsi,%rax,1),%r9
463         movb    %dl,%al
464         xorq    %r10,%r8
465         movzwq  (%r11,%r12,2),%r12
466         movzbl  %dl,%ebx
467         shlb    $4,%al
468         movzbq  (%rsp,%rcx,1),%r13
469         shrl    $4,%ebx
470         shlq    $48,%r12
471         xorq    %r8,%r13
472         movq    %r9,%r10
473         xorq    %r12,%r9
474         shrq    $8,%r8
475         movzbq  %r13b,%r13
476         shrq    $8,%r9
477         xorq    -128(%rbp,%rcx,8),%r8
478         shlq    $56,%r10
479         xorq    (%rbp,%rcx,8),%r9
480         roll    $8,%edx
481         xorq    8(%rsi,%rax,1),%r8
482         xorq    (%rsi,%rax,1),%r9
483         movb    %dl,%al
484         xorq    %r10,%r8
485         movzwq  (%r11,%r13,2),%r13
486         movzbl  %dl,%ecx
487         shlb    $4,%al
488         movzbq  (%rsp,%rbx,1),%r12
489         shrl    $4,%ecx
490         shlq    $48,%r13
491         xorq    %r8,%r12
492         movq    %r9,%r10
493         xorq    %r13,%r9
494         shrq    $8,%r8
495         movzbq  %r12b,%r12
496         shrq    $8,%r9
497         xorq    -128(%rbp,%rbx,8),%r8
498         shlq    $56,%r10
499         xorq    (%rbp,%rbx,8),%r9
500         roll    $8,%edx
501         xorq    8(%rsi,%rax,1),%r8
502         xorq    (%rsi,%rax,1),%r9
503         movb    %dl,%al
504         xorq    %r10,%r8
505         movzwq  (%r11,%r12,2),%r12
506         movzbl  %dl,%ebx
507         shlb    $4,%al
508         movzbq  (%rsp,%rcx,1),%r13
509         shrl    $4,%ebx
510         shlq    $48,%r12
511         xorq    %r8,%r13
512         movq    %r9,%r10
513         xorq    %r12,%r9
514         shrq    $8,%r8
515         movzbq  %r13b,%r13
516         shrq    $8,%r9
517         xorq    -128(%rbp,%rcx,8),%r8
518         shlq    $56,%r10
519         xorq    (%rbp,%rcx,8),%r9
520         roll    $8,%edx
521         xorq    8(%rsi,%rax,1),%r8
522         xorq    (%rsi,%rax,1),%r9
523         movb    %dl,%al
524         xorq    %r10,%r8
525         movzwq  (%r11,%r13,2),%r13
526         movzbl  %dl,%ecx
527         shlb    $4,%al
528         movzbq  (%rsp,%rbx,1),%r12
529         shrl    $4,%ecx
530         shlq    $48,%r13
531         xorq    %r8,%r12
532         movq    %r9,%r10
533         xorq    %r13,%r9
534         shrq    $8,%r8
535         movzbq  %r12b,%r12
536         movl    0(%rdi),%edx
537         shrq    $8,%r9
538         xorq    -128(%rbp,%rbx,8),%r8
539         shlq    $56,%r10
540         xorq    (%rbp,%rbx,8),%r9
541         roll    $8,%edx
542         xorq    8(%rsi,%rax,1),%r8
543         xorq    (%rsi,%rax,1),%r9
544         movb    %dl,%al
545         xorq    %r10,%r8
546         movzwq  (%r11,%r12,2),%r12
547         movzbl  %dl,%ebx
548         shlb    $4,%al
549         movzbq  (%rsp,%rcx,1),%r13
550         shrl    $4,%ebx
551         shlq    $48,%r12
552         xorq    %r8,%r13
553         movq    %r9,%r10
554         xorq    %r12,%r9
555         shrq    $8,%r8
556         movzbq  %r13b,%r13
557         shrq    $8,%r9
558         xorq    -128(%rbp,%rcx,8),%r8
559         shlq    $56,%r10
560         xorq    (%rbp,%rcx,8),%r9
561         roll    $8,%edx
562         xorq    8(%rsi,%rax,1),%r8
563         xorq    (%rsi,%rax,1),%r9
564         movb    %dl,%al
565         xorq    %r10,%r8
566         movzwq  (%r11,%r13,2),%r13
567         movzbl  %dl,%ecx
568         shlb    $4,%al
569         movzbq  (%rsp,%rbx,1),%r12
570         shrl    $4,%ecx
571         shlq    $48,%r13
572         xorq    %r8,%r12
573         movq    %r9,%r10
574         xorq    %r13,%r9
575         shrq    $8,%r8
576         movzbq  %r12b,%r12
577         shrq    $8,%r9
578         xorq    -128(%rbp,%rbx,8),%r8
579         shlq    $56,%r10
580         xorq    (%rbp,%rbx,8),%r9
581         roll    $8,%edx
582         xorq    8(%rsi,%rax,1),%r8
583         xorq    (%rsi,%rax,1),%r9
584         movb    %dl,%al
585         xorq    %r10,%r8
586         movzwq  (%r11,%r12,2),%r12
587         movzbl  %dl,%ebx
588         shlb    $4,%al
589         movzbq  (%rsp,%rcx,1),%r13
590         shrl    $4,%ebx
591         shlq    $48,%r12
592         xorq    %r8,%r13
593         movq    %r9,%r10
594         xorq    %r12,%r9
595         shrq    $8,%r8
596         movzbq  %r13b,%r13
597         shrq    $8,%r9
598         xorq    -128(%rbp,%rcx,8),%r8
599         shlq    $56,%r10
600         xorq    (%rbp,%rcx,8),%r9
601         roll    $8,%edx
602         xorq    8(%rsi,%rax,1),%r8
603         xorq    (%rsi,%rax,1),%r9
604         movb    %dl,%al
605         xorq    %r10,%r8
606         movzwq  (%r11,%r13,2),%r13
607         movzbl  %dl,%ecx
608         shlb    $4,%al
609         movzbq  (%rsp,%rbx,1),%r12
610         andl    $240,%ecx
611         shlq    $48,%r13
612         xorq    %r8,%r12
613         movq    %r9,%r10
614         xorq    %r13,%r9
615         shrq    $8,%r8
616         movzbq  %r12b,%r12
617         movl    -4(%rdi),%edx
618         shrq    $8,%r9
619         xorq    -128(%rbp,%rbx,8),%r8
620         shlq    $56,%r10
621         xorq    (%rbp,%rbx,8),%r9
622         movzwq  (%r11,%r12,2),%r12
623         xorq    8(%rsi,%rax,1),%r8
624         xorq    (%rsi,%rax,1),%r9
625         shlq    $48,%r12
626         xorq    %r10,%r8
627         xorq    %r12,%r9
628         movzbq  %r8b,%r13
629         shrq    $4,%r8
630         movq    %r9,%r10
631         shlb    $4,%r13b
632         shrq    $4,%r9
633         xorq    8(%rsi,%rcx,1),%r8
634         movzwq  (%r11,%r13,2),%r13
635         shlq    $60,%r10
636         xorq    (%rsi,%rcx,1),%r9
637         xorq    %r10,%r8
638         shlq    $48,%r13
639         bswapq  %r8
640         xorq    %r13,%r9
641         bswapq  %r9
642         cmpq    %r15,%r14
643         jb      .Louter_loop
644         movq    %r8,8(%rdi)
645         movq    %r9,(%rdi)
646
647         leaq    280(%rsp),%rsi
648         movq    0(%rsi),%r15
649         movq    8(%rsi),%r14
650         movq    16(%rsi),%r13
651         movq    24(%rsi),%r12
652         movq    32(%rsi),%rbp
653         movq    40(%rsi),%rbx
654         leaq    48(%rsi),%rsp
655 .Lghash_epilogue:
656         retq
657 .size   gcm_ghash_4bit,.-gcm_ghash_4bit
658 .globl  gcm_init_clmul
659 .type   gcm_init_clmul,@function
660 .align  16
661 gcm_init_clmul:
662         movdqu  (%rsi),%xmm2
663         pshufd  $78,%xmm2,%xmm2
664
665
666         pshufd  $255,%xmm2,%xmm4
667         movdqa  %xmm2,%xmm3
668         psllq   $1,%xmm2
669         pxor    %xmm5,%xmm5
670         psrlq   $63,%xmm3
671         pcmpgtd %xmm4,%xmm5
672         pslldq  $8,%xmm3
673         por     %xmm3,%xmm2
674
675
676         pand    .L0x1c2_polynomial(%rip),%xmm5
677         pxor    %xmm5,%xmm2
678
679
680         movdqa  %xmm2,%xmm0
681         movdqa  %xmm0,%xmm1
682         pshufd  $78,%xmm0,%xmm3
683         pshufd  $78,%xmm2,%xmm4
684         pxor    %xmm0,%xmm3
685         pxor    %xmm2,%xmm4
686 .byte   102,15,58,68,194,0
687 .byte   102,15,58,68,202,17
688 .byte   102,15,58,68,220,0
689         pxor    %xmm0,%xmm3
690         pxor    %xmm1,%xmm3
691
692         movdqa  %xmm3,%xmm4
693         psrldq  $8,%xmm3
694         pslldq  $8,%xmm4
695         pxor    %xmm3,%xmm1
696         pxor    %xmm4,%xmm0
697
698         movdqa  %xmm0,%xmm3
699         psllq   $1,%xmm0
700         pxor    %xmm3,%xmm0
701         psllq   $5,%xmm0
702         pxor    %xmm3,%xmm0
703         psllq   $57,%xmm0
704         movdqa  %xmm0,%xmm4
705         pslldq  $8,%xmm0
706         psrldq  $8,%xmm4
707         pxor    %xmm3,%xmm0
708         pxor    %xmm4,%xmm1
709
710
711         movdqa  %xmm0,%xmm4
712         psrlq   $5,%xmm0
713         pxor    %xmm4,%xmm0
714         psrlq   $1,%xmm0
715         pxor    %xmm4,%xmm0
716         pxor    %xmm1,%xmm4
717         psrlq   $1,%xmm0
718         pxor    %xmm4,%xmm0
719         movdqu  %xmm2,(%rdi)
720         movdqu  %xmm0,16(%rdi)
721         retq
722 .size   gcm_init_clmul,.-gcm_init_clmul
723 .globl  gcm_gmult_clmul
724 .type   gcm_gmult_clmul,@function
725 .align  16
726 gcm_gmult_clmul:
727         movdqu  (%rdi),%xmm0
728         movdqa  .Lbswap_mask(%rip),%xmm5
729         movdqu  (%rsi),%xmm2
730 .byte   102,15,56,0,197
731         movdqa  %xmm0,%xmm1
732         pshufd  $78,%xmm0,%xmm3
733         pshufd  $78,%xmm2,%xmm4
734         pxor    %xmm0,%xmm3
735         pxor    %xmm2,%xmm4
736 .byte   102,15,58,68,194,0
737 .byte   102,15,58,68,202,17
738 .byte   102,15,58,68,220,0
739         pxor    %xmm0,%xmm3
740         pxor    %xmm1,%xmm3
741
742         movdqa  %xmm3,%xmm4
743         psrldq  $8,%xmm3
744         pslldq  $8,%xmm4
745         pxor    %xmm3,%xmm1
746         pxor    %xmm4,%xmm0
747
748         movdqa  %xmm0,%xmm3
749         psllq   $1,%xmm0
750         pxor    %xmm3,%xmm0
751         psllq   $5,%xmm0
752         pxor    %xmm3,%xmm0
753         psllq   $57,%xmm0
754         movdqa  %xmm0,%xmm4
755         pslldq  $8,%xmm0
756         psrldq  $8,%xmm4
757         pxor    %xmm3,%xmm0
758         pxor    %xmm4,%xmm1
759
760
761         movdqa  %xmm0,%xmm4
762         psrlq   $5,%xmm0
763         pxor    %xmm4,%xmm0
764         psrlq   $1,%xmm0
765         pxor    %xmm4,%xmm0
766         pxor    %xmm1,%xmm4
767         psrlq   $1,%xmm0
768         pxor    %xmm4,%xmm0
769 .byte   102,15,56,0,197
770         movdqu  %xmm0,(%rdi)
771         retq
772 .size   gcm_gmult_clmul,.-gcm_gmult_clmul
773 .globl  gcm_ghash_clmul
774 .type   gcm_ghash_clmul,@function
775 .align  16
776 gcm_ghash_clmul:
777         movdqa  .Lbswap_mask(%rip),%xmm5
778
779         movdqu  (%rdi),%xmm0
780         movdqu  (%rsi),%xmm2
781 .byte   102,15,56,0,197
782
783         subq    $16,%rcx
784         jz      .Lodd_tail
785
786         movdqu  16(%rsi),%xmm8
787
788
789
790
791
792         movdqu  (%rdx),%xmm3
793         movdqu  16(%rdx),%xmm6
794 .byte   102,15,56,0,221
795 .byte   102,15,56,0,245
796         pxor    %xmm3,%xmm0
797         movdqa  %xmm6,%xmm7
798         pshufd  $78,%xmm6,%xmm3
799         pshufd  $78,%xmm2,%xmm4
800         pxor    %xmm6,%xmm3
801         pxor    %xmm2,%xmm4
802 .byte   102,15,58,68,242,0
803 .byte   102,15,58,68,250,17
804 .byte   102,15,58,68,220,0
805         pxor    %xmm6,%xmm3
806         pxor    %xmm7,%xmm3
807
808         movdqa  %xmm3,%xmm4
809         psrldq  $8,%xmm3
810         pslldq  $8,%xmm4
811         pxor    %xmm3,%xmm7
812         pxor    %xmm4,%xmm6
813         movdqa  %xmm0,%xmm1
814         pshufd  $78,%xmm0,%xmm3
815         pshufd  $78,%xmm8,%xmm4
816         pxor    %xmm0,%xmm3
817         pxor    %xmm8,%xmm4
818
819         leaq    32(%rdx),%rdx
820         subq    $32,%rcx
821         jbe     .Leven_tail
822
823 .Lmod_loop:
824 .byte   102,65,15,58,68,192,0
825 .byte   102,65,15,58,68,200,17
826 .byte   102,15,58,68,220,0
827         pxor    %xmm0,%xmm3
828         pxor    %xmm1,%xmm3
829
830         movdqa  %xmm3,%xmm4
831         psrldq  $8,%xmm3
832         pslldq  $8,%xmm4
833         pxor    %xmm3,%xmm1
834         pxor    %xmm4,%xmm0
835         movdqu  (%rdx),%xmm3
836         pxor    %xmm6,%xmm0
837         pxor    %xmm7,%xmm1
838
839         movdqu  16(%rdx),%xmm6
840 .byte   102,15,56,0,221
841 .byte   102,15,56,0,245
842
843         movdqa  %xmm6,%xmm7
844         pshufd  $78,%xmm6,%xmm9
845         pshufd  $78,%xmm2,%xmm10
846         pxor    %xmm6,%xmm9
847         pxor    %xmm2,%xmm10
848         pxor    %xmm3,%xmm1
849
850         movdqa  %xmm0,%xmm3
851         psllq   $1,%xmm0
852         pxor    %xmm3,%xmm0
853         psllq   $5,%xmm0
854         pxor    %xmm3,%xmm0
855 .byte   102,15,58,68,242,0
856         psllq   $57,%xmm0
857         movdqa  %xmm0,%xmm4
858         pslldq  $8,%xmm0
859         psrldq  $8,%xmm4
860         pxor    %xmm3,%xmm0
861         pxor    %xmm4,%xmm1
862
863 .byte   102,15,58,68,250,17
864         movdqa  %xmm0,%xmm4
865         psrlq   $5,%xmm0
866         pxor    %xmm4,%xmm0
867         psrlq   $1,%xmm0
868         pxor    %xmm4,%xmm0
869         pxor    %xmm1,%xmm4
870         psrlq   $1,%xmm0
871         pxor    %xmm4,%xmm0
872
873 .byte   102,69,15,58,68,202,0
874         movdqa  %xmm0,%xmm1
875         pshufd  $78,%xmm0,%xmm3
876         pshufd  $78,%xmm8,%xmm4
877         pxor    %xmm0,%xmm3
878         pxor    %xmm8,%xmm4
879
880         pxor    %xmm6,%xmm9
881         pxor    %xmm7,%xmm9
882         movdqa  %xmm9,%xmm10
883         psrldq  $8,%xmm9
884         pslldq  $8,%xmm10
885         pxor    %xmm9,%xmm7
886         pxor    %xmm10,%xmm6
887
888         leaq    32(%rdx),%rdx
889         subq    $32,%rcx
890         ja      .Lmod_loop
891
892 .Leven_tail:
893 .byte   102,65,15,58,68,192,0
894 .byte   102,65,15,58,68,200,17
895 .byte   102,15,58,68,220,0
896         pxor    %xmm0,%xmm3
897         pxor    %xmm1,%xmm3
898
899         movdqa  %xmm3,%xmm4
900         psrldq  $8,%xmm3
901         pslldq  $8,%xmm4
902         pxor    %xmm3,%xmm1
903         pxor    %xmm4,%xmm0
904         pxor    %xmm6,%xmm0
905         pxor    %xmm7,%xmm1
906
907         movdqa  %xmm0,%xmm3
908         psllq   $1,%xmm0
909         pxor    %xmm3,%xmm0
910         psllq   $5,%xmm0
911         pxor    %xmm3,%xmm0
912         psllq   $57,%xmm0
913         movdqa  %xmm0,%xmm4
914         pslldq  $8,%xmm0
915         psrldq  $8,%xmm4
916         pxor    %xmm3,%xmm0
917         pxor    %xmm4,%xmm1
918
919
920         movdqa  %xmm0,%xmm4
921         psrlq   $5,%xmm0
922         pxor    %xmm4,%xmm0
923         psrlq   $1,%xmm0
924         pxor    %xmm4,%xmm0
925         pxor    %xmm1,%xmm4
926         psrlq   $1,%xmm0
927         pxor    %xmm4,%xmm0
928         testq   %rcx,%rcx
929         jnz     .Ldone
930
931 .Lodd_tail:
932         movdqu  (%rdx),%xmm3
933 .byte   102,15,56,0,221
934         pxor    %xmm3,%xmm0
935         movdqa  %xmm0,%xmm1
936         pshufd  $78,%xmm0,%xmm3
937         pshufd  $78,%xmm2,%xmm4
938         pxor    %xmm0,%xmm3
939         pxor    %xmm2,%xmm4
940 .byte   102,15,58,68,194,0
941 .byte   102,15,58,68,202,17
942 .byte   102,15,58,68,220,0
943         pxor    %xmm0,%xmm3
944         pxor    %xmm1,%xmm3
945
946         movdqa  %xmm3,%xmm4
947         psrldq  $8,%xmm3
948         pslldq  $8,%xmm4
949         pxor    %xmm3,%xmm1
950         pxor    %xmm4,%xmm0
951
952         movdqa  %xmm0,%xmm3
953         psllq   $1,%xmm0
954         pxor    %xmm3,%xmm0
955         psllq   $5,%xmm0
956         pxor    %xmm3,%xmm0
957         psllq   $57,%xmm0
958         movdqa  %xmm0,%xmm4
959         pslldq  $8,%xmm0
960         psrldq  $8,%xmm4
961         pxor    %xmm3,%xmm0
962         pxor    %xmm4,%xmm1
963
964
965         movdqa  %xmm0,%xmm4
966         psrlq   $5,%xmm0
967         pxor    %xmm4,%xmm0
968         psrlq   $1,%xmm0
969         pxor    %xmm4,%xmm0
970         pxor    %xmm1,%xmm4
971         psrlq   $1,%xmm0
972         pxor    %xmm4,%xmm0
973 .Ldone:
974 .byte   102,15,56,0,197
975         movdqu  %xmm0,(%rdi)
976         retq
977 .LSEH_end_gcm_ghash_clmul:
978 .size   gcm_ghash_clmul,.-gcm_ghash_clmul
979 .align  64
980 .Lbswap_mask:
981 .byte   15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
982 .L0x1c2_polynomial:
983 .byte   1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
984 .align  64
985 .type   .Lrem_4bit,@object
986 .Lrem_4bit:
987 .long   0,0,0,471859200,0,943718400,0,610271232
988 .long   0,1887436800,0,1822425088,0,1220542464,0,1423966208
989 .long   0,3774873600,0,4246732800,0,3644850176,0,3311403008
990 .long   0,2441084928,0,2376073216,0,2847932416,0,3051356160
991 .type   .Lrem_8bit,@object
992 .Lrem_8bit:
993 .value  0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
994 .value  0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
995 .value  0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
996 .value  0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
997 .value  0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
998 .value  0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
999 .value  0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
1000 .value  0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
1001 .value  0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
1002 .value  0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
1003 .value  0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
1004 .value  0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
1005 .value  0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
1006 .value  0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
1007 .value  0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
1008 .value  0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
1009 .value  0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
1010 .value  0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
1011 .value  0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
1012 .value  0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
1013 .value  0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
1014 .value  0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
1015 .value  0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
1016 .value  0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
1017 .value  0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
1018 .value  0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
1019 .value  0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
1020 .value  0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
1021 .value  0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
1022 .value  0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
1023 .value  0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
1024 .value  0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
1025
1026 .byte   71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1027 .align  64
1028 #if defined(HAVE_GNU_STACK)
1029 .section .note.GNU-stack,"",%progbits
1030 #endif