Merge branch 'vendor/GCC50'
[dragonfly.git] / sys / dev / crypto / aesni / aeskeys_i386.S
1 /*-
2 * The white paper of AES-NI instructions can be downloaded from:
3  *   http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
4  *
5  * Copyright (C) 2008-2010, Intel Corporation
6  *    Author: Huang Ying <ying.huang@intel.com>
7  *            Vinodh Gopal <vinodh.gopal@intel.com>
8  *            Kahraman Akdemir
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following
12  * conditions are met:
13  *
14  * - Redistributions of source code must retain the above copyright
15  *   notice, this list of conditions and the following disclaimer.
16  *
17  * - Redistributions in binary form must reproduce the above copyright
18  *   notice, this list of conditions and the following disclaimer in the
19  *   documentation and/or other materials provided with the
20  *   distribution.
21  *
22  * - Neither the name of Intel Corporation nor the names of its
23  *   contributors may be used to endorse or promote products
24  *   derived from this software without specific prior written
25  *   permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
31  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38  *
39  * $FreeBSD: src/sys/crypto/aesni/aeskeys_i386.S,v 1.3 2011/03/02 14:56:58 kib Exp $
40  */
41
42 #include <machine/asmacros.h>
43
44         .text
45
46 ENTRY(_key_expansion_128)
47 _key_expansion_256a:
48         .cfi_startproc
49         pshufd  $0b11111111,%xmm1,%xmm1
50         shufps  $0b00010000,%xmm0,%xmm4
51         pxor    %xmm4,%xmm0
52         shufps  $0b10001100,%xmm0,%xmm4
53         pxor    %xmm4,%xmm0
54         pxor    %xmm1,%xmm0
55         movaps  %xmm0,(%edx)
56         addl    $0x10,%edx
57         retl
58         .cfi_endproc
59 END(_key_expansion_128)
60
61 ENTRY(_key_expansion_192a)
62         .cfi_startproc
63         pshufd  $0b01010101,%xmm1,%xmm1
64         shufps  $0b00010000,%xmm0,%xmm4
65         pxor    %xmm4,%xmm0
66         shufps  $0b10001100,%xmm0,%xmm4
67         pxor    %xmm4,%xmm0
68         pxor    %xmm1,%xmm0
69         movaps  %xmm2,%xmm5
70         movaps  %xmm2,%xmm6
71         pslldq  $4,%xmm5
72         pshufd  $0b11111111,%xmm0,%xmm3
73         pxor    %xmm3,%xmm2
74         pxor    %xmm5,%xmm2
75         movaps  %xmm0,%xmm1
76         shufps  $0b01000100,%xmm0,%xmm6
77         movaps  %xmm6,(%edx)
78         shufps  $0b01001110,%xmm2,%xmm1
79         movaps  %xmm1,0x10(%edx)
80         addl    $0x20,%edx
81         retl
82         .cfi_endproc
83 END(_key_expansion_192a)
84
85 ENTRY(_key_expansion_192b)
86         .cfi_startproc
87         pshufd  $0b01010101,%xmm1,%xmm1
88         shufps  $0b00010000,%xmm0,%xmm4
89         pxor    %xmm4,%xmm0
90         shufps  $0b10001100,%xmm0,%xmm4
91         pxor    %xmm4,%xmm0
92         pxor    %xmm1,%xmm0
93         movaps  %xmm2,%xmm5
94         pslldq  $4,%xmm5
95         pshufd  $0b11111111,%xmm0,%xmm3
96         pxor    %xmm3,%xmm2
97         pxor    %xmm5,%xmm2
98         movaps  %xmm0,(%edx)
99         addl    $0x10,%edx
100         retl
101         .cfi_endproc
102 END(_key_expansion_192b)
103
104 ENTRY(_key_expansion_256b)
105         .cfi_startproc
106         pshufd  $0b10101010,%xmm1,%xmm1
107         shufps  $0b00010000,%xmm2,%xmm4
108         pxor    %xmm4,%xmm2
109         shufps  $0b10001100,%xmm2,%xmm4
110         pxor    %xmm4,%xmm2
111         pxor    %xmm1,%xmm2
112         movaps  %xmm2,(%edx)
113         addl    $0x10,%edx
114         retl
115         .cfi_endproc
116 END(_key_expansion_256b)
117
118 ENTRY(aesni_set_enckey)
119         .cfi_startproc
120         pushl   %ebp
121         .cfi_adjust_cfa_offset 4
122         movl    %esp,%ebp
123         movl    8(%ebp),%ecx
124         movl    12(%ebp),%edx
125         movups  (%ecx),%xmm0            # user key (first 16 bytes)
126         movaps  %xmm0,(%edx)
127         addl    $0x10,%edx              # key addr
128         pxor    %xmm4,%xmm4             # xmm4 is assumed 0 in _key_expansion_x
129         cmpl    $12,16(%ebp)            # rounds
130         jb      .Lenc_key128
131         je      .Lenc_key192
132         movups  0x10(%ecx),%xmm2        # other user key
133         movaps  %xmm2,(%edx)
134         addl    $0x10,%edx
135 //      aeskeygenassist $0x1,%xmm2,%xmm1        # round 1
136         .byte   0x66,0x0f,0x3a,0xdf,0xca,0x01
137         call    _key_expansion_256a
138 //      aeskeygenassist $0x1,%xmm0,%xmm1
139         .byte   0x66,0x0f,0x3a,0xdf,0xc8,0x01
140         call    _key_expansion_256b
141 //      aeskeygenassist $0x2,%xmm2,%xmm1        # round 2
142         .byte   0x66,0x0f,0x3a,0xdf,0xca,0x02
143         call    _key_expansion_256a
144 //      aeskeygenassist $0x2,%xmm0,%xmm1
145         .byte   0x66,0x0f,0x3a,0xdf,0xc8,0x02
146         call    _key_expansion_256b
147 //      aeskeygenassist $0x4,%xmm2,%xmm1        # round 3
148         .byte   0x66,0x0f,0x3a,0xdf,0xca,0x04
149         call    _key_expansion_256a
150 //      aeskeygenassist $0x4,%xmm0,%xmm1
151         .byte   0x66,0x0f,0x3a,0xdf,0xc8,0x04
152         call    _key_expansion_256b
153 //      aeskeygenassist $0x8,%xmm2,%xmm1        # round 4
154         .byte   0x66,0x0f,0x3a,0xdf,0xca,0x08
155         call    _key_expansion_256a
156 //      aeskeygenassist $0x8,%xmm0,%xmm1
157         .byte   0x66,0x0f,0x3a,0xdf,0xc8,0x08
158         call    _key_expansion_256b
159 //      aeskeygenassist $0x10,%xmm2,%xmm1       # round 5
160         .byte   0x66,0x0f,0x3a,0xdf,0xca,0x10
161         call    _key_expansion_256a
162 //      aeskeygenassist $0x10,%xmm0,%xmm1
163         .byte   0x66,0x0f,0x3a,0xdf,0xc8,0x10
164         call    _key_expansion_256b
165 //      aeskeygenassist $0x20,%xmm2,%xmm1       # round 6
166         .byte   0x66,0x0f,0x3a,0xdf,0xca,0x20
167         call    _key_expansion_256a
168 //      aeskeygenassist $0x20,%xmm0,%xmm1
169         .byte   0x66,0x0f,0x3a,0xdf,0xc8,0x20
170         call    _key_expansion_256b
171 //      aeskeygenassist $0x40,%xmm2,%xmm1       # round 7
172         .byte   0x66,0x0f,0x3a,0xdf,0xca,0x40
173         call    _key_expansion_256a
174         .cfi_adjust_cfa_offset -4
175         leave
176         retl
177 .Lenc_key192:
178         movq    0x10(%ecx),%xmm2                # other user key
179 //      aeskeygenassist $0x1,%xmm2,%xmm1        # round 1
180         .byte   0x66,0x0f,0x3a,0xdf,0xca,0x01
181         call    _key_expansion_192a
182 //      aeskeygenassist $0x2,%xmm2,%xmm1        # round 2
183         .byte   0x66,0x0f,0x3a,0xdf,0xca,0x02
184         call    _key_expansion_192b
185 //      aeskeygenassist $0x4,%xmm2,%xmm1        # round 3
186         .byte   0x66,0x0f,0x3a,0xdf,0xca,0x04
187         call    _key_expansion_192a
188 //      aeskeygenassist $0x8,%xmm2,%xmm1        # round 4
189         .byte   0x66,0x0f,0x3a,0xdf,0xca,0x08
190         call    _key_expansion_192b
191 //      aeskeygenassist $0x10,%xmm2,%xmm1       # round 5
192         .byte   0x66,0x0f,0x3a,0xdf,0xca,0x10
193         call    _key_expansion_192a
194 //      aeskeygenassist $0x20,%xmm2,%xmm1       # round 6
195         .byte   0x66,0x0f,0x3a,0xdf,0xca,0x20
196         call    _key_expansion_192b
197 //      aeskeygenassist $0x40,%xmm2,%xmm1       # round 7
198         .byte   0x66,0x0f,0x3a,0xdf,0xca,0x40
199         call    _key_expansion_192a
200 //      aeskeygenassist $0x80,%xmm2,%xmm1       # round 8
201         .byte   0x66,0x0f,0x3a,0xdf,0xca,0x80
202         call    _key_expansion_192b
203         leave
204         .cfi_adjust_cfa_offset -4
205         retl
206 .Lenc_key128:
207 //      aeskeygenassist $0x1,%xmm0,%xmm1        # round 1
208         .byte   0x66,0x0f,0x3a,0xdf,0xc8,0x01
209         call    _key_expansion_128
210 //      aeskeygenassist $0x2,%xmm0,%xmm1        # round 2
211         .byte   0x66,0x0f,0x3a,0xdf,0xc8,0x02
212         call    _key_expansion_128
213 //      aeskeygenassist $0x4,%xmm0,%xmm1        # round 3
214         .byte   0x66,0x0f,0x3a,0xdf,0xc8,0x04
215         call    _key_expansion_128
216 //      aeskeygenassist $0x8,%xmm0,%xmm1        # round 4
217         .byte   0x66,0x0f,0x3a,0xdf,0xc8,0x08
218         call    _key_expansion_128
219 //      aeskeygenassist $0x10,%xmm0,%xmm1       # round 5
220         .byte   0x66,0x0f,0x3a,0xdf,0xc8,0x10
221         call    _key_expansion_128
222 //      aeskeygenassist $0x20,%xmm0,%xmm1       # round 6
223         .byte   0x66,0x0f,0x3a,0xdf,0xc8,0x20
224         call    _key_expansion_128
225 //      aeskeygenassist $0x40,%xmm0,%xmm1       # round 7
226         .byte   0x66,0x0f,0x3a,0xdf,0xc8,0x40
227         call    _key_expansion_128
228 //      aeskeygenassist $0x80,%xmm0,%xmm1       # round 8
229         .byte   0x66,0x0f,0x3a,0xdf,0xc8,0x80
230         call    _key_expansion_128
231 //      aeskeygenassist $0x1b,%xmm0,%xmm1       # round 9
232         .byte   0x66,0x0f,0x3a,0xdf,0xc8,0x1b
233         call    _key_expansion_128
234 //      aeskeygenassist $0x36,%xmm0,%xmm1       # round 10
235         .byte   0x66,0x0f,0x3a,0xdf,0xc8,0x36
236         call    _key_expansion_128
237         leave
238         .cfi_adjust_cfa_offset -4
239         retl
240         .cfi_endproc
241 END(aesni_set_enckey)
242
243 ENTRY(aesni_set_deckey)
244         .cfi_startproc
245         pushl   %ebp
246         .cfi_adjust_cfa_offset 4
247         movl    %esp,%ebp
248         movl    16(%ebp),%eax   /* rounds */
249         movl    %eax,%ecx
250         shll    $4,%ecx
251         addl    8(%ebp),%ecx    /* encrypt_schedule last quad */
252         movl    12(%ebp),%edx   /* decrypt_schedule */
253         movdqa  (%ecx),%xmm0
254         movdqa  %xmm0,(%edx)
255         decl    %eax
256 1:
257         addl    $0x10,%edx
258         subl    $0x10,%ecx
259 //      aesimc  (%ecx),%xmm1
260         .byte   0x66,0x0f,0x38,0xdb,0x09
261         movdqa  %xmm1,(%edx)
262         decl    %eax
263         jne     1b
264
265         addl    $0x10,%edx
266         subl    $0x10,%ecx
267         movdqa  (%ecx),%xmm0
268         movdqa  %xmm0,(%edx)
269         leave
270         .cfi_adjust_cfa_offset -4
271         retl
272         .cfi_endproc
273 END(aesni_set_deckey)