Import OpenSSL-1.0.0a.
[dragonfly.git] / crypto / openssl / crypto / aes / aes_core.c
1 /* crypto/aes/aes_core.c -*- mode:C; c-file-style: "eay" -*- */
2 /**
3  * rijndael-alg-fst.c
4  *
5  * @version 3.0 (December 2000)
6  *
7  * Optimised ANSI C code for the Rijndael cipher (now AES)
8  *
9  * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
10  * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
11  * @author Paulo Barreto <paulo.barreto@terra.com.br>
12  *
13  * This code is hereby placed in the public domain.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
19  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27
28 /* Note: rewritten a little bit to provide error control and an OpenSSL-
29    compatible API */
30
31 #ifndef AES_DEBUG
32 # ifndef NDEBUG
33 #  define NDEBUG
34 # endif
35 #endif
36 #include <assert.h>
37
38 #include <stdlib.h>
39 #include <openssl/aes.h>
40 #include "aes_locl.h"
41
42 #ifndef AES_ASM
43 /*
44 Te0[x] = S [x].[02, 01, 01, 03];
45 Te1[x] = S [x].[03, 02, 01, 01];
46 Te2[x] = S [x].[01, 03, 02, 01];
47 Te3[x] = S [x].[01, 01, 03, 02];
48
49 Td0[x] = Si[x].[0e, 09, 0d, 0b];
50 Td1[x] = Si[x].[0b, 0e, 09, 0d];
51 Td2[x] = Si[x].[0d, 0b, 0e, 09];
52 Td3[x] = Si[x].[09, 0d, 0b, 0e];
53 Td4[x] = Si[x].[01];
54 */
55
56 static const u32 Te0[256] = {
57     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
58     0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
59     0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
60     0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
61     0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
62     0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
63     0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
64     0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
65     0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
66     0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
67     0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
68     0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
69     0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
70     0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
71     0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
72     0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
73     0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
74     0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
75     0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
76     0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
77     0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
78     0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
79     0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
80     0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
81     0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
82     0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
83     0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
84     0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
85     0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
86     0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
87     0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
88     0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
89     0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
90     0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
91     0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
92     0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
93     0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
94     0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
95     0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
96     0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
97     0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
98     0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
99     0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
100     0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
101     0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
102     0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
103     0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
104     0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
105     0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
106     0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
107     0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
108     0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
109     0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
110     0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
111     0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
112     0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
113     0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
114     0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
115     0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
116     0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
117     0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
118     0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
119     0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
120     0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
121 };
122 static const u32 Te1[256] = {
123     0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
124     0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
125     0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
126     0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
127     0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
128     0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
129     0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
130     0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
131     0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
132     0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
133     0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
134     0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
135     0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
136     0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
137     0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
138     0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
139     0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
140     0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
141     0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
142     0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
143     0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
144     0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
145     0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
146     0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
147     0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
148     0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
149     0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
150     0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
151     0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
152     0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
153     0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
154     0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
155     0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
156     0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
157     0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
158     0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
159     0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
160     0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
161     0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
162     0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
163     0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
164     0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
165     0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
166     0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
167     0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
168     0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
169     0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
170     0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
171     0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
172     0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
173     0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
174     0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
175     0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
176     0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
177     0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
178     0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
179     0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
180     0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
181     0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
182     0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
183     0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
184     0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
185     0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
186     0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
187 };
188 static const u32 Te2[256] = {
189     0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
190     0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
191     0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
192     0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
193     0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
194     0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
195     0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
196     0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
197     0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
198     0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
199     0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
200     0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
201     0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
202     0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
203     0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
204     0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
205     0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
206     0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
207     0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
208     0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
209     0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
210     0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
211     0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
212     0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
213     0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
214     0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
215     0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
216     0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
217     0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
218     0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
219     0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
220     0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
221     0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
222     0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
223     0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
224     0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
225     0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
226     0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
227     0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
228     0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
229     0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
230     0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
231     0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
232     0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
233     0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
234     0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
235     0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
236     0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
237     0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
238     0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
239     0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
240     0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
241     0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
242     0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
243     0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
244     0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
245     0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
246     0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
247     0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
248     0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
249     0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
250     0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
251     0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
252     0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
253 };
254 static const u32 Te3[256] = {
255     0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
256     0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
257     0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
258     0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
259     0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
260     0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
261     0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
262     0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
263     0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
264     0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
265     0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
266     0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
267     0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
268     0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
269     0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
270     0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
271     0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
272     0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
273     0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
274     0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
275     0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
276     0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
277     0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
278     0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
279     0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
280     0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
281     0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
282     0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
283     0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
284     0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
285     0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
286     0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
287     0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
288     0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
289     0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
290     0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
291     0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
292     0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
293     0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
294     0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
295     0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
296     0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
297     0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
298     0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
299     0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
300     0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
301     0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
302     0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
303     0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
304     0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
305     0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
306     0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
307     0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
308     0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
309     0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
310     0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
311     0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
312     0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
313     0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
314     0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
315     0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
316     0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
317     0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
318     0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
319 };
320
321 static const u32 Td0[256] = {
322     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
323     0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
324     0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
325     0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
326     0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
327     0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
328     0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
329     0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
330     0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
331     0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
332     0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
333     0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
334     0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
335     0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
336     0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
337     0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
338     0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
339     0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
340     0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
341     0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
342     0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
343     0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
344     0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
345     0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
346     0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
347     0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
348     0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
349     0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
350     0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
351     0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
352     0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
353     0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
354     0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
355     0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
356     0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
357     0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
358     0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
359     0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
360     0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
361     0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
362     0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
363     0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
364     0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
365     0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
366     0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
367     0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
368     0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
369     0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
370     0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
371     0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
372     0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
373     0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
374     0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
375     0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
376     0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
377     0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
378     0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
379     0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
380     0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
381     0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
382     0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
383     0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
384     0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
385     0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
386 };
387 static const u32 Td1[256] = {
388     0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
389     0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
390     0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
391     0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
392     0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
393     0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
394     0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
395     0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
396     0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
397     0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
398     0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
399     0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
400     0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
401     0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
402     0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
403     0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
404     0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
405     0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
406     0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
407     0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
408     0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
409     0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
410     0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
411     0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
412     0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
413     0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
414     0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
415     0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
416     0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
417     0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
418     0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
419     0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
420     0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
421     0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
422     0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
423     0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
424     0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
425     0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
426     0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
427     0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
428     0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
429     0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
430     0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
431     0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
432     0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
433     0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
434     0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
435     0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
436     0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
437     0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
438     0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
439     0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
440     0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
441     0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
442     0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
443     0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
444     0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
445     0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
446     0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
447     0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
448     0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
449     0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
450     0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
451     0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
452 };
453 static const u32 Td2[256] = {
454     0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
455     0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
456     0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
457     0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
458     0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
459     0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
460     0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
461     0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
462     0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
463     0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
464     0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
465     0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
466     0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
467     0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
468     0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
469     0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
470     0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
471     0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
472     0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
473     0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
474     0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
475     0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
476     0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
477     0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
478     0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
479     0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
480     0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
481     0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
482     0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
483     0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
484     0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
485     0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
486     0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
487     0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
488     0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
489     0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
490     0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
491     0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
492     0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
493     0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
494     0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
495     0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
496     0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
497     0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
498     0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
499     0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
500     0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
501     0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
502     0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
503     0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
504     0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
505     0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
506     0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
507     0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
508     0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
509     0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
510     0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
511     0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
512     0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
513     0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
514     0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
515     0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
516     0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
517     0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
518 };
519 static const u32 Td3[256] = {
520     0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
521     0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
522     0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
523     0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
524     0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
525     0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
526     0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
527     0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
528     0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
529     0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
530     0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
531     0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
532     0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
533     0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
534     0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
535     0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
536     0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
537     0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
538     0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
539     0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
540     0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
541     0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
542     0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
543     0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
544     0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
545     0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
546     0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
547     0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
548     0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
549     0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
550     0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
551     0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
552     0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
553     0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
554     0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
555     0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
556     0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
557     0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
558     0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
559     0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
560     0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
561     0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
562     0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
563     0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
564     0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
565     0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
566     0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
567     0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
568     0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
569     0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
570     0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
571     0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
572     0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
573     0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
574     0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
575     0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
576     0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
577     0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
578     0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
579     0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
580     0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
581     0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
582     0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
583     0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
584 };
585 static const u8 Td4[256] = {
586     0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
587     0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
588     0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
589     0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
590     0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
591     0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
592     0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
593     0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
594     0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
595     0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
596     0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
597     0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
598     0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
599     0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
600     0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
601     0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
602     0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
603     0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
604     0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
605     0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
606     0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
607     0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
608     0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
609     0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
610     0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
611     0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
612     0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
613     0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
614     0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
615     0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
616     0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
617     0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
618 };
619 static const u32 rcon[] = {
620         0x01000000, 0x02000000, 0x04000000, 0x08000000,
621         0x10000000, 0x20000000, 0x40000000, 0x80000000,
622         0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
623 };
624
625 /**
626  * Expand the cipher key into the encryption key schedule.
627  */
628 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
629                         AES_KEY *key) {
630
631         u32 *rk;
632         int i = 0;
633         u32 temp;
634
635         if (!userKey || !key)
636                 return -1;
637         if (bits != 128 && bits != 192 && bits != 256)
638                 return -2;
639
640         rk = key->rd_key;
641
642         if (bits==128)
643                 key->rounds = 10;
644         else if (bits==192)
645                 key->rounds = 12;
646         else
647                 key->rounds = 14;
648
649         rk[0] = GETU32(userKey     );
650         rk[1] = GETU32(userKey +  4);
651         rk[2] = GETU32(userKey +  8);
652         rk[3] = GETU32(userKey + 12);
653         if (bits == 128) {
654                 while (1) {
655                         temp  = rk[3];
656                         rk[4] = rk[0] ^
657                                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
658                                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
659                                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
660                                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
661                                 rcon[i];
662                         rk[5] = rk[1] ^ rk[4];
663                         rk[6] = rk[2] ^ rk[5];
664                         rk[7] = rk[3] ^ rk[6];
665                         if (++i == 10) {
666                                 return 0;
667                         }
668                         rk += 4;
669                 }
670         }
671         rk[4] = GETU32(userKey + 16);
672         rk[5] = GETU32(userKey + 20);
673         if (bits == 192) {
674                 while (1) {
675                         temp = rk[ 5];
676                         rk[ 6] = rk[ 0] ^
677                                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
678                                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
679                                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
680                                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
681                                 rcon[i];
682                         rk[ 7] = rk[ 1] ^ rk[ 6];
683                         rk[ 8] = rk[ 2] ^ rk[ 7];
684                         rk[ 9] = rk[ 3] ^ rk[ 8];
685                         if (++i == 8) {
686                                 return 0;
687                         }
688                         rk[10] = rk[ 4] ^ rk[ 9];
689                         rk[11] = rk[ 5] ^ rk[10];
690                         rk += 6;
691                 }
692         }
693         rk[6] = GETU32(userKey + 24);
694         rk[7] = GETU32(userKey + 28);
695         if (bits == 256) {
696                 while (1) {
697                         temp = rk[ 7];
698                         rk[ 8] = rk[ 0] ^
699                                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
700                                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
701                                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
702                                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
703                                 rcon[i];
704                         rk[ 9] = rk[ 1] ^ rk[ 8];
705                         rk[10] = rk[ 2] ^ rk[ 9];
706                         rk[11] = rk[ 3] ^ rk[10];
707                         if (++i == 7) {
708                                 return 0;
709                         }
710                         temp = rk[11];
711                         rk[12] = rk[ 4] ^
712                                 (Te2[(temp >> 24)       ] & 0xff000000) ^
713                                 (Te3[(temp >> 16) & 0xff] & 0x00ff0000) ^
714                                 (Te0[(temp >>  8) & 0xff] & 0x0000ff00) ^
715                                 (Te1[(temp      ) & 0xff] & 0x000000ff);
716                         rk[13] = rk[ 5] ^ rk[12];
717                         rk[14] = rk[ 6] ^ rk[13];
718                         rk[15] = rk[ 7] ^ rk[14];
719
720                         rk += 8;
721                 }
722         }
723         return 0;
724 }
725
726 /**
727  * Expand the cipher key into the decryption key schedule.
728  */
729 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
730                          AES_KEY *key) {
731
732         u32 *rk;
733         int i, j, status;
734         u32 temp;
735
736         /* first, start with an encryption schedule */
737         status = AES_set_encrypt_key(userKey, bits, key);
738         if (status < 0)
739                 return status;
740
741         rk = key->rd_key;
742
743         /* invert the order of the round keys: */
744         for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
745                 temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
746                 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
747                 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
748                 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
749         }
750         /* apply the inverse MixColumn transform to all round keys but the first and the last: */
751         for (i = 1; i < (key->rounds); i++) {
752                 rk += 4;
753                 rk[0] =
754                         Td0[Te1[(rk[0] >> 24)       ] & 0xff] ^
755                         Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
756                         Td2[Te1[(rk[0] >>  8) & 0xff] & 0xff] ^
757                         Td3[Te1[(rk[0]      ) & 0xff] & 0xff];
758                 rk[1] =
759                         Td0[Te1[(rk[1] >> 24)       ] & 0xff] ^
760                         Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
761                         Td2[Te1[(rk[1] >>  8) & 0xff] & 0xff] ^
762                         Td3[Te1[(rk[1]      ) & 0xff] & 0xff];
763                 rk[2] =
764                         Td0[Te1[(rk[2] >> 24)       ] & 0xff] ^
765                         Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
766                         Td2[Te1[(rk[2] >>  8) & 0xff] & 0xff] ^
767                         Td3[Te1[(rk[2]      ) & 0xff] & 0xff];
768                 rk[3] =
769                         Td0[Te1[(rk[3] >> 24)       ] & 0xff] ^
770                         Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
771                         Td2[Te1[(rk[3] >>  8) & 0xff] & 0xff] ^
772                         Td3[Te1[(rk[3]      ) & 0xff] & 0xff];
773         }
774         return 0;
775 }
776
777 /*
778  * Encrypt a single block
779  * in and out can overlap
780  */
781 void AES_encrypt(const unsigned char *in, unsigned char *out,
782                  const AES_KEY *key) {
783
784         const u32 *rk;
785         u32 s0, s1, s2, s3, t0, t1, t2, t3;
786 #ifndef FULL_UNROLL
787         int r;
788 #endif /* ?FULL_UNROLL */
789
790         assert(in && out && key);
791         rk = key->rd_key;
792
793         /*
794          * map byte array block to cipher state
795          * and add initial round key:
796          */
797         s0 = GETU32(in     ) ^ rk[0];
798         s1 = GETU32(in +  4) ^ rk[1];
799         s2 = GETU32(in +  8) ^ rk[2];
800         s3 = GETU32(in + 12) ^ rk[3];
801 #ifdef FULL_UNROLL
802         /* round 1: */
803         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
804         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
805         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
806         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
807         /* round 2: */
808         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
809         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
810         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
811         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
812         /* round 3: */
813         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
814         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
815         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
816         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
817         /* round 4: */
818         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
819         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
820         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
821         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
822         /* round 5: */
823         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
824         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
825         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
826         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
827         /* round 6: */
828         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
829         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
830         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
831         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
832         /* round 7: */
833         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
834         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
835         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
836         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
837         /* round 8: */
838         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
839         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
840         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
841         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
842         /* round 9: */
843         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
844         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
845         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
846         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
847     if (key->rounds > 10) {
848         /* round 10: */
849         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
850         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
851         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
852         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
853         /* round 11: */
854         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
855         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
856         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
857         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
858         if (key->rounds > 12) {
859             /* round 12: */
860             s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
861             s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
862             s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
863             s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
864             /* round 13: */
865             t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
866             t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
867             t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
868             t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
869         }
870     }
871     rk += key->rounds << 2;
872 #else  /* !FULL_UNROLL */
873     /*
874      * Nr - 1 full rounds:
875      */
876     r = key->rounds >> 1;
877     for (;;) {
878         t0 =
879             Te0[(s0 >> 24)       ] ^
880             Te1[(s1 >> 16) & 0xff] ^
881             Te2[(s2 >>  8) & 0xff] ^
882             Te3[(s3      ) & 0xff] ^
883             rk[4];
884         t1 =
885             Te0[(s1 >> 24)       ] ^
886             Te1[(s2 >> 16) & 0xff] ^
887             Te2[(s3 >>  8) & 0xff] ^
888             Te3[(s0      ) & 0xff] ^
889             rk[5];
890         t2 =
891             Te0[(s2 >> 24)       ] ^
892             Te1[(s3 >> 16) & 0xff] ^
893             Te2[(s0 >>  8) & 0xff] ^
894             Te3[(s1      ) & 0xff] ^
895             rk[6];
896         t3 =
897             Te0[(s3 >> 24)       ] ^
898             Te1[(s0 >> 16) & 0xff] ^
899             Te2[(s1 >>  8) & 0xff] ^
900             Te3[(s2      ) & 0xff] ^
901             rk[7];
902
903         rk += 8;
904         if (--r == 0) {
905             break;
906         }
907
908         s0 =
909             Te0[(t0 >> 24)       ] ^
910             Te1[(t1 >> 16) & 0xff] ^
911             Te2[(t2 >>  8) & 0xff] ^
912             Te3[(t3      ) & 0xff] ^
913             rk[0];
914         s1 =
915             Te0[(t1 >> 24)       ] ^
916             Te1[(t2 >> 16) & 0xff] ^
917             Te2[(t3 >>  8) & 0xff] ^
918             Te3[(t0      ) & 0xff] ^
919             rk[1];
920         s2 =
921             Te0[(t2 >> 24)       ] ^
922             Te1[(t3 >> 16) & 0xff] ^
923             Te2[(t0 >>  8) & 0xff] ^
924             Te3[(t1      ) & 0xff] ^
925             rk[2];
926         s3 =
927             Te0[(t3 >> 24)       ] ^
928             Te1[(t0 >> 16) & 0xff] ^
929             Te2[(t1 >>  8) & 0xff] ^
930             Te3[(t2      ) & 0xff] ^
931             rk[3];
932     }
933 #endif /* ?FULL_UNROLL */
934     /*
935          * apply last round and
936          * map cipher state to byte array block:
937          */
938         s0 =
939                 (Te2[(t0 >> 24)       ] & 0xff000000) ^
940                 (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
941                 (Te0[(t2 >>  8) & 0xff] & 0x0000ff00) ^
942                 (Te1[(t3      ) & 0xff] & 0x000000ff) ^
943                 rk[0];
944         PUTU32(out     , s0);
945         s1 =
946                 (Te2[(t1 >> 24)       ] & 0xff000000) ^
947                 (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^
948                 (Te0[(t3 >>  8) & 0xff] & 0x0000ff00) ^
949                 (Te1[(t0      ) & 0xff] & 0x000000ff) ^
950                 rk[1];
951         PUTU32(out +  4, s1);
952         s2 =
953                 (Te2[(t2 >> 24)       ] & 0xff000000) ^
954                 (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^
955                 (Te0[(t0 >>  8) & 0xff] & 0x0000ff00) ^
956                 (Te1[(t1      ) & 0xff] & 0x000000ff) ^
957                 rk[2];
958         PUTU32(out +  8, s2);
959         s3 =
960                 (Te2[(t3 >> 24)       ] & 0xff000000) ^
961                 (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^
962                 (Te0[(t1 >>  8) & 0xff] & 0x0000ff00) ^
963                 (Te1[(t2      ) & 0xff] & 0x000000ff) ^
964                 rk[3];
965         PUTU32(out + 12, s3);
966 }
967
968 /*
969  * Decrypt a single block
970  * in and out can overlap
971  */
972 void AES_decrypt(const unsigned char *in, unsigned char *out,
973                  const AES_KEY *key) {
974
975         const u32 *rk;
976         u32 s0, s1, s2, s3, t0, t1, t2, t3;
977 #ifndef FULL_UNROLL
978         int r;
979 #endif /* ?FULL_UNROLL */
980
981         assert(in && out && key);
982         rk = key->rd_key;
983
984         /*
985          * map byte array block to cipher state
986          * and add initial round key:
987          */
988     s0 = GETU32(in     ) ^ rk[0];
989     s1 = GETU32(in +  4) ^ rk[1];
990     s2 = GETU32(in +  8) ^ rk[2];
991     s3 = GETU32(in + 12) ^ rk[3];
992 #ifdef FULL_UNROLL
993     /* round 1: */
994     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
995     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
996     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
997     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
998     /* round 2: */
999     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
1000     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
1001     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
1002     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
1003     /* round 3: */
1004     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
1005     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
1006     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
1007     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
1008     /* round 4: */
1009     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
1010     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
1011     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
1012     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
1013     /* round 5: */
1014     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
1015     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
1016     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
1017     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
1018     /* round 6: */
1019     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
1020     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
1021     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
1022     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
1023     /* round 7: */
1024     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
1025     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
1026     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
1027     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
1028     /* round 8: */
1029     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
1030     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
1031     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
1032     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
1033     /* round 9: */
1034     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
1035     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
1036     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
1037     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
1038     if (key->rounds > 10) {
1039         /* round 10: */
1040         s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
1041         s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
1042         s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
1043         s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
1044         /* round 11: */
1045         t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
1046         t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
1047         t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
1048         t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
1049         if (key->rounds > 12) {
1050             /* round 12: */
1051             s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
1052             s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
1053             s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
1054             s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
1055             /* round 13: */
1056             t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
1057             t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
1058             t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
1059             t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
1060         }
1061     }
1062         rk += key->rounds << 2;
1063 #else  /* !FULL_UNROLL */
1064     /*
1065      * Nr - 1 full rounds:
1066      */
1067     r = key->rounds >> 1;
1068     for (;;) {
1069         t0 =
1070             Td0[(s0 >> 24)       ] ^
1071             Td1[(s3 >> 16) & 0xff] ^
1072             Td2[(s2 >>  8) & 0xff] ^
1073             Td3[(s1      ) & 0xff] ^
1074             rk[4];
1075         t1 =
1076             Td0[(s1 >> 24)       ] ^
1077             Td1[(s0 >> 16) & 0xff] ^
1078             Td2[(s3 >>  8) & 0xff] ^
1079             Td3[(s2      ) & 0xff] ^
1080             rk[5];
1081         t2 =
1082             Td0[(s2 >> 24)       ] ^
1083             Td1[(s1 >> 16) & 0xff] ^
1084             Td2[(s0 >>  8) & 0xff] ^
1085             Td3[(s3      ) & 0xff] ^
1086             rk[6];
1087         t3 =
1088             Td0[(s3 >> 24)       ] ^
1089             Td1[(s2 >> 16) & 0xff] ^
1090             Td2[(s1 >>  8) & 0xff] ^
1091             Td3[(s0      ) & 0xff] ^
1092             rk[7];
1093
1094         rk += 8;
1095         if (--r == 0) {
1096             break;
1097         }
1098
1099         s0 =
1100             Td0[(t0 >> 24)       ] ^
1101             Td1[(t3 >> 16) & 0xff] ^
1102             Td2[(t2 >>  8) & 0xff] ^
1103             Td3[(t1      ) & 0xff] ^
1104             rk[0];
1105         s1 =
1106             Td0[(t1 >> 24)       ] ^
1107             Td1[(t0 >> 16) & 0xff] ^
1108             Td2[(t3 >>  8) & 0xff] ^
1109             Td3[(t2      ) & 0xff] ^
1110             rk[1];
1111         s2 =
1112             Td0[(t2 >> 24)       ] ^
1113             Td1[(t1 >> 16) & 0xff] ^
1114             Td2[(t0 >>  8) & 0xff] ^
1115             Td3[(t3      ) & 0xff] ^
1116             rk[2];
1117         s3 =
1118             Td0[(t3 >> 24)       ] ^
1119             Td1[(t2 >> 16) & 0xff] ^
1120             Td2[(t1 >>  8) & 0xff] ^
1121             Td3[(t0      ) & 0xff] ^
1122             rk[3];
1123     }
1124 #endif /* ?FULL_UNROLL */
1125     /*
1126          * apply last round and
1127          * map cipher state to byte array block:
1128          */
1129         s0 =
1130                 (Td4[(t0 >> 24)       ] << 24) ^
1131                 (Td4[(t3 >> 16) & 0xff] << 16) ^
1132                 (Td4[(t2 >>  8) & 0xff] <<  8) ^
1133                 (Td4[(t1      ) & 0xff])       ^
1134                 rk[0];
1135         PUTU32(out     , s0);
1136         s1 =
1137                 (Td4[(t1 >> 24)       ] << 24) ^
1138                 (Td4[(t0 >> 16) & 0xff] << 16) ^
1139                 (Td4[(t3 >>  8) & 0xff] <<  8) ^
1140                 (Td4[(t2      ) & 0xff])       ^
1141                 rk[1];
1142         PUTU32(out +  4, s1);
1143         s2 =
1144                 (Td4[(t2 >> 24)       ] << 24) ^
1145                 (Td4[(t1 >> 16) & 0xff] << 16) ^
1146                 (Td4[(t0 >>  8) & 0xff] <<  8) ^
1147                 (Td4[(t3      ) & 0xff])       ^
1148                 rk[2];
1149         PUTU32(out +  8, s2);
1150         s3 =
1151                 (Td4[(t3 >> 24)       ] << 24) ^
1152                 (Td4[(t2 >> 16) & 0xff] << 16) ^
1153                 (Td4[(t1 >>  8) & 0xff] <<  8) ^
1154                 (Td4[(t0      ) & 0xff])       ^
1155                 rk[3];
1156         PUTU32(out + 12, s3);
1157 }
1158
1159 #else /* AES_ASM */
1160
1161 static const u8 Te4[256] = {
1162     0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
1163     0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
1164     0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
1165     0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
1166     0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
1167     0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
1168     0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
1169     0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
1170     0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
1171     0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
1172     0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
1173     0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
1174     0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
1175     0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
1176     0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
1177     0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
1178     0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
1179     0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
1180     0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
1181     0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
1182     0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
1183     0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
1184     0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
1185     0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
1186     0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
1187     0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
1188     0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
1189     0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
1190     0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
1191     0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
1192     0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
1193     0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
1194 };
1195 static const u32 rcon[] = {
1196         0x01000000, 0x02000000, 0x04000000, 0x08000000,
1197         0x10000000, 0x20000000, 0x40000000, 0x80000000,
1198         0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
1199 };
1200
1201 /**
1202  * Expand the cipher key into the encryption key schedule.
1203  */
1204 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
1205                         AES_KEY *key) {
1206         u32 *rk;
1207         int i = 0;
1208         u32 temp;
1209
1210         if (!userKey || !key)
1211                 return -1;
1212         if (bits != 128 && bits != 192 && bits != 256)
1213                 return -2;
1214
1215         rk = key->rd_key;
1216
1217         if (bits==128)
1218                 key->rounds = 10;
1219         else if (bits==192)
1220                 key->rounds = 12;
1221         else
1222                 key->rounds = 14;
1223
1224         rk[0] = GETU32(userKey     );
1225         rk[1] = GETU32(userKey +  4);
1226         rk[2] = GETU32(userKey +  8);
1227         rk[3] = GETU32(userKey + 12);
1228         if (bits == 128) {
1229                 while (1) {
1230                         temp  = rk[3];
1231                         rk[4] = rk[0] ^
1232                                 (Te4[(temp >> 16) & 0xff] << 24) ^
1233                                 (Te4[(temp >>  8) & 0xff] << 16) ^
1234                                 (Te4[(temp      ) & 0xff] << 8) ^
1235                                 (Te4[(temp >> 24)       ]) ^
1236                                 rcon[i];
1237                         rk[5] = rk[1] ^ rk[4];
1238                         rk[6] = rk[2] ^ rk[5];
1239                         rk[7] = rk[3] ^ rk[6];
1240                         if (++i == 10) {
1241                                 return 0;
1242                         }
1243                         rk += 4;
1244                 }
1245         }
1246         rk[4] = GETU32(userKey + 16);
1247         rk[5] = GETU32(userKey + 20);
1248         if (bits == 192) {
1249                 while (1) {
1250                         temp = rk[ 5];
1251                         rk[ 6] = rk[ 0] ^
1252                                 (Te4[(temp >> 16) & 0xff] << 24) ^
1253                                 (Te4[(temp >>  8) & 0xff] << 16) ^
1254                                 (Te4[(temp      ) & 0xff] << 8) ^
1255                                 (Te4[(temp >> 24)       ]) ^
1256                                 rcon[i];
1257                         rk[ 7] = rk[ 1] ^ rk[ 6];
1258                         rk[ 8] = rk[ 2] ^ rk[ 7];
1259                         rk[ 9] = rk[ 3] ^ rk[ 8];
1260                         if (++i == 8) {
1261                                 return 0;
1262                         }
1263                         rk[10] = rk[ 4] ^ rk[ 9];
1264                         rk[11] = rk[ 5] ^ rk[10];
1265                         rk += 6;
1266                 }
1267         }
1268         rk[6] = GETU32(userKey + 24);
1269         rk[7] = GETU32(userKey + 28);
1270         if (bits == 256) {
1271                 while (1) {
1272                         temp = rk[ 7];
1273                         rk[ 8] = rk[ 0] ^
1274                                 (Te4[(temp >> 16) & 0xff] << 24) ^
1275                                 (Te4[(temp >>  8) & 0xff] << 16) ^
1276                                 (Te4[(temp      ) & 0xff] << 8) ^
1277                                 (Te4[(temp >> 24)       ]) ^
1278                                 rcon[i];
1279                         rk[ 9] = rk[ 1] ^ rk[ 8];
1280                         rk[10] = rk[ 2] ^ rk[ 9];
1281                         rk[11] = rk[ 3] ^ rk[10];
1282                         if (++i == 7) {
1283                                 return 0;
1284                         }
1285                         temp = rk[11];
1286                         rk[12] = rk[ 4] ^
1287                                 (Te4[(temp >> 24)       ] << 24) ^
1288                                 (Te4[(temp >> 16) & 0xff] << 16) ^
1289                                 (Te4[(temp >>  8) & 0xff] << 8) ^
1290                                 (Te4[(temp      ) & 0xff]);
1291                         rk[13] = rk[ 5] ^ rk[12];
1292                         rk[14] = rk[ 6] ^ rk[13];
1293                         rk[15] = rk[ 7] ^ rk[14];
1294
1295                         rk += 8;
1296                 }
1297         }
1298         return 0;
1299 }
1300
1301 /**
1302  * Expand the cipher key into the decryption key schedule.
1303  */
1304 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
1305                          AES_KEY *key) {
1306
1307         u32 *rk;
1308         int i, j, status;
1309         u32 temp;
1310
1311         /* first, start with an encryption schedule */
1312         status = AES_set_encrypt_key(userKey, bits, key);
1313         if (status < 0)
1314                 return status;
1315
1316         rk = key->rd_key;
1317
1318         /* invert the order of the round keys: */
1319         for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
1320                 temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
1321                 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
1322                 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
1323                 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
1324         }
1325         /* apply the inverse MixColumn transform to all round keys but the first and the last: */
1326         for (i = 1; i < (key->rounds); i++) {
1327                 rk += 4;
1328                 for (j = 0; j < 4; j++) {
1329                         u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
1330
1331                         tp1 = rk[j];
1332                         m = tp1 & 0x80808080;
1333                         tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
1334                                 ((m - (m >> 7)) & 0x1b1b1b1b);
1335                         m = tp2 & 0x80808080;
1336                         tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
1337                                 ((m - (m >> 7)) & 0x1b1b1b1b);
1338                         m = tp4 & 0x80808080;
1339                         tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
1340                                 ((m - (m >> 7)) & 0x1b1b1b1b);
1341                         tp9 = tp8 ^ tp1;
1342                         tpb = tp9 ^ tp2;
1343                         tpd = tp9 ^ tp4;
1344                         tpe = tp8 ^ tp4 ^ tp2;
1345 #if defined(ROTATE)
1346                         rk[j] = tpe ^ ROTATE(tpd,16) ^
1347                                 ROTATE(tp9,24) ^ ROTATE(tpb,8);
1348 #else
1349                         rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ 
1350                                 (tp9 >> 8) ^ (tp9 << 24) ^
1351                                 (tpb >> 24) ^ (tpb << 8);
1352 #endif
1353                 }
1354         }
1355         return 0;
1356 }
1357
1358 #endif /* AES_ASM */