Commit | Line | Data |
---|---|---|
5febbddd PA |
1 | #!/usr/bin/env perl |
2 | ||
3 | # ==================================================================== | |
4 | # Copyright (c) 2008 Andy Polyakov <appro@openssl.org> | |
5 | # | |
6 | # This module may be used under the terms of either the GNU General | |
7 | # Public License version 2 or later, the GNU Lesser General Public | |
8 | # License version 2.1 or later, the Mozilla Public License version | |
9 | # 1.1 or the BSD License. The exact terms of either license are | |
10 | # distributed along with this module. For further details see | |
11 | # http://www.openssl.org/~appro/camellia/. | |
12 | # ==================================================================== | |
13 | ||
14 | # Performance in cycles per processed byte (less is better) in | |
15 | # 'openssl speed ...' benchmark: | |
16 | # | |
17 | # AMD64 Core2 EM64T | |
18 | # -evp camellia-128-ecb 16.7 21.0 22.7 | |
19 | # + over gcc 3.4.6 +25% +5% 0% | |
20 | # | |
21 | # camellia-128-cbc 15.7 20.4 21.1 | |
22 | # | |
23 | # 128-bit key setup 128 216 205 cycles/key | |
24 | # + over gcc 3.4.6 +54% +39% +15% | |
25 | # | |
26 | # Numbers in "+" rows represent performance improvement over compiler | |
27 | # generated code. Key setup timings are impressive on AMD and Core2 | |
28 | # thanks to 64-bit operations being covertly deployed. Improvement on | |
29 | # EM64T, pre-Core2 Intel x86_64 CPU, is not as impressive, because it | |
30 | # apparently emulates some of 64-bit operations in [32-bit] microcode. | |
31 | ||
32 | $flavour = shift; | |
33 | $output = shift; | |
34 | if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } | |
35 | ||
36 | $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); | |
37 | ||
38 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |
39 | ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or | |
40 | ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or | |
41 | die "can't locate x86_64-xlate.pl"; | |
42 | ||
9bb344e0 PA |
43 | open OUT,"| \"$^X\" $xlate $flavour $output"; |
44 | *STDOUT=*OUT; | |
5febbddd PA |
45 | |
46 | sub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/; $r; } | |
47 | sub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/; | |
48 | $r =~ s/%[er]([sd]i)/%\1l/; | |
49 | $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; } | |
50 | ||
51 | $t0="%eax";$t1="%ebx";$t2="%ecx";$t3="%edx"; | |
52 | @S=("%r8d","%r9d","%r10d","%r11d"); | |
53 | $i0="%esi"; | |
54 | $i1="%edi"; | |
55 | $Tbl="%rbp"; # size optimization | |
56 | $inp="%r12"; | |
57 | $out="%r13"; | |
58 | $key="%r14"; | |
59 | $keyend="%r15"; | |
60 | $arg0d=$win64?"%ecx":"%edi"; | |
61 | ||
62 | # const unsigned int Camellia_SBOX[4][256]; | |
63 | # Well, sort of... Camellia_SBOX[0][] is interleaved with [1][], | |
64 | # and [2][] - with [3][]. This is done to minimize code size. | |
65 | $SBOX1_1110=0; # Camellia_SBOX[0] | |
66 | $SBOX4_4404=4; # Camellia_SBOX[1] | |
67 | $SBOX2_0222=2048; # Camellia_SBOX[2] | |
68 | $SBOX3_3033=2052; # Camellia_SBOX[3] | |
69 | ||
70 | sub Camellia_Feistel { | |
71 | my $i=@_[0]; | |
72 | my $seed=defined(@_[1])?@_[1]:0; | |
73 | my $scale=$seed<0?-8:8; | |
74 | my $j=($i&1)*2; | |
75 | my $s0=@S[($j)%4],$s1=@S[($j+1)%4],$s2=@S[($j+2)%4],$s3=@S[($j+3)%4]; | |
76 | ||
77 | $code.=<<___; | |
78 | xor $s0,$t0 # t0^=key[0] | |
79 | xor $s1,$t1 # t1^=key[1] | |
80 | movz `&hi("$t0")`,$i0 # (t0>>8)&0xff | |
81 | movz `&lo("$t1")`,$i1 # (t1>>0)&0xff | |
82 | mov $SBOX3_3033($Tbl,$i0,8),$t3 # t3=SBOX3_3033[0] | |
83 | mov $SBOX1_1110($Tbl,$i1,8),$t2 # t2=SBOX1_1110[1] | |
84 | movz `&lo("$t0")`,$i0 # (t0>>0)&0xff | |
85 | shr \$16,$t0 | |
86 | movz `&hi("$t1")`,$i1 # (t1>>8)&0xff | |
87 | xor $SBOX4_4404($Tbl,$i0,8),$t3 # t3^=SBOX4_4404[0] | |
88 | shr \$16,$t1 | |
89 | xor $SBOX4_4404($Tbl,$i1,8),$t2 # t2^=SBOX4_4404[1] | |
90 | movz `&hi("$t0")`,$i0 # (t0>>24)&0xff | |
91 | movz `&lo("$t1")`,$i1 # (t1>>16)&0xff | |
92 | xor $SBOX1_1110($Tbl,$i0,8),$t3 # t3^=SBOX1_1110[0] | |
93 | xor $SBOX3_3033($Tbl,$i1,8),$t2 # t2^=SBOX3_3033[1] | |
94 | movz `&lo("$t0")`,$i0 # (t0>>16)&0xff | |
95 | movz `&hi("$t1")`,$i1 # (t1>>24)&0xff | |
96 | xor $SBOX2_0222($Tbl,$i0,8),$t3 # t3^=SBOX2_0222[0] | |
97 | xor $SBOX2_0222($Tbl,$i1,8),$t2 # t2^=SBOX2_0222[1] | |
98 | mov `$seed+($i+1)*$scale`($key),$t1 # prefetch key[i+1] | |
99 | mov `$seed+($i+1)*$scale+4`($key),$t0 | |
100 | xor $t3,$t2 # t2^=t3 | |
101 | ror \$8,$t3 # t3=RightRotate(t3,8) | |
102 | xor $t2,$s2 | |
103 | xor $t2,$s3 | |
104 | xor $t3,$s3 | |
105 | ___ | |
106 | } | |
107 | ||
108 | # void Camellia_EncryptBlock_Rounds( | |
109 | # int grandRounds, | |
110 | # const Byte plaintext[], | |
111 | # const KEY_TABLE_TYPE keyTable, | |
112 | # Byte ciphertext[]) | |
113 | $code=<<___; | |
114 | .text | |
115 | ||
116 | # V1.x API | |
117 | .globl Camellia_EncryptBlock | |
118 | .type Camellia_EncryptBlock,\@abi-omnipotent | |
119 | .align 16 | |
120 | Camellia_EncryptBlock: | |
121 | movl \$128,%eax | |
122 | subl $arg0d,%eax | |
123 | movl \$3,$arg0d | |
124 | adcl \$0,$arg0d # keyBitLength==128?3:4 | |
125 | jmp .Lenc_rounds | |
126 | .size Camellia_EncryptBlock,.-Camellia_EncryptBlock | |
127 | # V2 | |
128 | .globl Camellia_EncryptBlock_Rounds | |
129 | .type Camellia_EncryptBlock_Rounds,\@function,4 | |
130 | .align 16 | |
131 | .Lenc_rounds: | |
132 | Camellia_EncryptBlock_Rounds: | |
133 | push %rbx | |
134 | push %rbp | |
135 | push %r13 | |
136 | push %r14 | |
137 | push %r15 | |
138 | .Lenc_prologue: | |
139 | ||
140 | #mov %rsi,$inp # put away arguments | |
141 | mov %rcx,$out | |
142 | mov %rdx,$key | |
143 | ||
144 | shl \$6,%edi # process grandRounds | |
145 | lea .LCamellia_SBOX(%rip),$Tbl | |
146 | lea ($key,%rdi),$keyend | |
147 | ||
148 | mov 0(%rsi),@S[0] # load plaintext | |
149 | mov 4(%rsi),@S[1] | |
150 | mov 8(%rsi),@S[2] | |
151 | bswap @S[0] | |
152 | mov 12(%rsi),@S[3] | |
153 | bswap @S[1] | |
154 | bswap @S[2] | |
155 | bswap @S[3] | |
156 | ||
157 | call _x86_64_Camellia_encrypt | |
158 | ||
159 | bswap @S[0] | |
160 | bswap @S[1] | |
161 | bswap @S[2] | |
162 | mov @S[0],0($out) | |
163 | bswap @S[3] | |
164 | mov @S[1],4($out) | |
165 | mov @S[2],8($out) | |
166 | mov @S[3],12($out) | |
167 | ||
168 | mov 0(%rsp),%r15 | |
169 | mov 8(%rsp),%r14 | |
170 | mov 16(%rsp),%r13 | |
171 | mov 24(%rsp),%rbp | |
172 | mov 32(%rsp),%rbx | |
173 | lea 40(%rsp),%rsp | |
174 | .Lenc_epilogue: | |
175 | ret | |
176 | .size Camellia_EncryptBlock_Rounds,.-Camellia_EncryptBlock_Rounds | |
177 | ||
178 | .type _x86_64_Camellia_encrypt,\@abi-omnipotent | |
179 | .align 16 | |
180 | _x86_64_Camellia_encrypt: | |
181 | xor 0($key),@S[1] | |
182 | xor 4($key),@S[0] # ^=key[0-3] | |
183 | xor 8($key),@S[3] | |
184 | xor 12($key),@S[2] | |
185 | .align 16 | |
186 | .Leloop: | |
187 | mov 16($key),$t1 # prefetch key[4-5] | |
188 | mov 20($key),$t0 | |
189 | ||
190 | ___ | |
191 | for ($i=0;$i<6;$i++) { Camellia_Feistel($i,16); } | |
192 | $code.=<<___; | |
193 | lea 16*4($key),$key | |
194 | cmp $keyend,$key | |
195 | mov 8($key),$t3 # prefetch key[2-3] | |
196 | mov 12($key),$t2 | |
197 | je .Ledone | |
198 | ||
199 | and @S[0],$t0 | |
200 | or @S[3],$t3 | |
201 | rol \$1,$t0 | |
202 | xor $t3,@S[2] # s2^=s3|key[3]; | |
203 | xor $t0,@S[1] # s1^=LeftRotate(s0&key[0],1); | |
204 | and @S[2],$t2 | |
205 | or @S[1],$t1 | |
206 | rol \$1,$t2 | |
207 | xor $t1,@S[0] # s0^=s1|key[1]; | |
208 | xor $t2,@S[3] # s3^=LeftRotate(s2&key[2],1); | |
209 | jmp .Leloop | |
210 | ||
211 | .align 16 | |
212 | .Ledone: | |
213 | xor @S[2],$t0 # SwapHalf | |
214 | xor @S[3],$t1 | |
215 | xor @S[0],$t2 | |
216 | xor @S[1],$t3 | |
217 | ||
218 | mov $t0,@S[0] | |
219 | mov $t1,@S[1] | |
220 | mov $t2,@S[2] | |
221 | mov $t3,@S[3] | |
222 | ||
223 | .byte 0xf3,0xc3 # rep ret | |
224 | .size _x86_64_Camellia_encrypt,.-_x86_64_Camellia_encrypt | |
225 | ||
226 | # V1.x API | |
227 | .globl Camellia_DecryptBlock | |
228 | .type Camellia_DecryptBlock,\@abi-omnipotent | |
229 | .align 16 | |
230 | Camellia_DecryptBlock: | |
231 | movl \$128,%eax | |
232 | subl $arg0d,%eax | |
233 | movl \$3,$arg0d | |
234 | adcl \$0,$arg0d # keyBitLength==128?3:4 | |
235 | jmp .Ldec_rounds | |
236 | .size Camellia_DecryptBlock,.-Camellia_DecryptBlock | |
237 | # V2 | |
238 | .globl Camellia_DecryptBlock_Rounds | |
239 | .type Camellia_DecryptBlock_Rounds,\@function,4 | |
240 | .align 16 | |
241 | .Ldec_rounds: | |
242 | Camellia_DecryptBlock_Rounds: | |
243 | push %rbx | |
244 | push %rbp | |
245 | push %r13 | |
246 | push %r14 | |
247 | push %r15 | |
248 | .Ldec_prologue: | |
249 | ||
250 | #mov %rsi,$inp # put away arguments | |
251 | mov %rcx,$out | |
252 | mov %rdx,$keyend | |
253 | ||
254 | shl \$6,%edi # process grandRounds | |
255 | lea .LCamellia_SBOX(%rip),$Tbl | |
256 | lea ($keyend,%rdi),$key | |
257 | ||
258 | mov 0(%rsi),@S[0] # load plaintext | |
259 | mov 4(%rsi),@S[1] | |
260 | mov 8(%rsi),@S[2] | |
261 | bswap @S[0] | |
262 | mov 12(%rsi),@S[3] | |
263 | bswap @S[1] | |
264 | bswap @S[2] | |
265 | bswap @S[3] | |
266 | ||
267 | call _x86_64_Camellia_decrypt | |
268 | ||
269 | bswap @S[0] | |
270 | bswap @S[1] | |
271 | bswap @S[2] | |
272 | mov @S[0],0($out) | |
273 | bswap @S[3] | |
274 | mov @S[1],4($out) | |
275 | mov @S[2],8($out) | |
276 | mov @S[3],12($out) | |
277 | ||
278 | mov 0(%rsp),%r15 | |
279 | mov 8(%rsp),%r14 | |
280 | mov 16(%rsp),%r13 | |
281 | mov 24(%rsp),%rbp | |
282 | mov 32(%rsp),%rbx | |
283 | lea 40(%rsp),%rsp | |
284 | .Ldec_epilogue: | |
285 | ret | |
286 | .size Camellia_DecryptBlock_Rounds,.-Camellia_DecryptBlock_Rounds | |
287 | ||
288 | .type _x86_64_Camellia_decrypt,\@abi-omnipotent | |
289 | .align 16 | |
290 | _x86_64_Camellia_decrypt: | |
291 | xor 0($key),@S[1] | |
292 | xor 4($key),@S[0] # ^=key[0-3] | |
293 | xor 8($key),@S[3] | |
294 | xor 12($key),@S[2] | |
295 | .align 16 | |
296 | .Ldloop: | |
297 | mov -8($key),$t1 # prefetch key[4-5] | |
298 | mov -4($key),$t0 | |
299 | ||
300 | ___ | |
301 | for ($i=0;$i<6;$i++) { Camellia_Feistel($i,-8); } | |
302 | $code.=<<___; | |
303 | lea -16*4($key),$key | |
304 | cmp $keyend,$key | |
305 | mov 0($key),$t3 # prefetch key[2-3] | |
306 | mov 4($key),$t2 | |
307 | je .Lddone | |
308 | ||
309 | and @S[0],$t0 | |
310 | or @S[3],$t3 | |
311 | rol \$1,$t0 | |
312 | xor $t3,@S[2] # s2^=s3|key[3]; | |
313 | xor $t0,@S[1] # s1^=LeftRotate(s0&key[0],1); | |
314 | and @S[2],$t2 | |
315 | or @S[1],$t1 | |
316 | rol \$1,$t2 | |
317 | xor $t1,@S[0] # s0^=s1|key[1]; | |
318 | xor $t2,@S[3] # s3^=LeftRotate(s2&key[2],1); | |
319 | ||
320 | jmp .Ldloop | |
321 | ||
322 | .align 16 | |
323 | .Lddone: | |
324 | xor @S[2],$t2 | |
325 | xor @S[3],$t3 | |
326 | xor @S[0],$t0 | |
327 | xor @S[1],$t1 | |
328 | ||
329 | mov $t2,@S[0] # SwapHalf | |
330 | mov $t3,@S[1] | |
331 | mov $t0,@S[2] | |
332 | mov $t1,@S[3] | |
333 | ||
334 | .byte 0xf3,0xc3 # rep ret | |
335 | .size _x86_64_Camellia_decrypt,.-_x86_64_Camellia_decrypt | |
336 | ___ | |
337 | ||
338 | sub _saveround { | |
339 | my ($rnd,$key,@T)=@_; | |
340 | my $bias=int(@T[0])?shift(@T):0; | |
341 | ||
342 | if ($#T==3) { | |
343 | $code.=<<___; | |
344 | mov @T[1],`$bias+$rnd*8+0`($key) | |
345 | mov @T[0],`$bias+$rnd*8+4`($key) | |
346 | mov @T[3],`$bias+$rnd*8+8`($key) | |
347 | mov @T[2],`$bias+$rnd*8+12`($key) | |
348 | ___ | |
349 | } else { | |
350 | $code.=" mov @T[0],`$bias+$rnd*8+0`($key)\n"; | |
351 | $code.=" mov @T[1],`$bias+$rnd*8+8`($key)\n" if ($#T>=1); | |
352 | } | |
353 | } | |
354 | ||
355 | sub _loadround { | |
356 | my ($rnd,$key,@T)=@_; | |
357 | my $bias=int(@T[0])?shift(@T):0; | |
358 | ||
359 | $code.=" mov `$bias+$rnd*8+0`($key),@T[0]\n"; | |
360 | $code.=" mov `$bias+$rnd*8+8`($key),@T[1]\n" if ($#T>=1); | |
361 | } | |
362 | ||
363 | # shld is very slow on Intel EM64T family. Even on AMD it limits | |
364 | # instruction decode rate [because it's VectorPath] and consequently | |
365 | # performance... | |
366 | sub __rotl128 { | |
367 | my ($i0,$i1,$rot)=@_; | |
368 | ||
369 | if ($rot) { | |
370 | $code.=<<___; | |
371 | mov $i0,%r11 | |
372 | shld \$$rot,$i1,$i0 | |
373 | shld \$$rot,%r11,$i1 | |
374 | ___ | |
375 | } | |
376 | } | |
377 | ||
378 | # ... Implementing 128-bit rotate without shld gives 80% better | |
379 | # performance EM64T, +15% on AMD64 and only ~7% degradation on | |
380 | # Core2. This is therefore preferred. | |
381 | sub _rotl128 { | |
382 | my ($i0,$i1,$rot)=@_; | |
383 | ||
384 | if ($rot) { | |
385 | $code.=<<___; | |
386 | mov $i0,%r11 | |
387 | shl \$$rot,$i0 | |
388 | mov $i1,%r9 | |
389 | shr \$`64-$rot`,%r9 | |
390 | shr \$`64-$rot`,%r11 | |
391 | or %r9,$i0 | |
392 | shl \$$rot,$i1 | |
393 | or %r11,$i1 | |
394 | ___ | |
395 | } | |
396 | } | |
397 | ||
398 | { my $step=0; | |
399 | ||
400 | $code.=<<___; | |
401 | .globl Camellia_Ekeygen | |
402 | .type Camellia_Ekeygen,\@function,3 | |
403 | .align 16 | |
404 | Camellia_Ekeygen: | |
405 | push %rbx | |
406 | push %rbp | |
407 | push %r13 | |
408 | push %r14 | |
409 | push %r15 | |
410 | .Lkey_prologue: | |
411 | ||
412 | mov %rdi,$keyend # put away arguments, keyBitLength | |
413 | mov %rdx,$out # keyTable | |
414 | ||
415 | mov 0(%rsi),@S[0] # load 0-127 bits | |
416 | mov 4(%rsi),@S[1] | |
417 | mov 8(%rsi),@S[2] | |
418 | mov 12(%rsi),@S[3] | |
419 | ||
420 | bswap @S[0] | |
421 | bswap @S[1] | |
422 | bswap @S[2] | |
423 | bswap @S[3] | |
424 | ___ | |
425 | &_saveround (0,$out,@S); # KL<<<0 | |
426 | $code.=<<___; | |
427 | cmp \$128,$keyend # check keyBitLength | |
428 | je .L1st128 | |
429 | ||
430 | mov 16(%rsi),@S[0] # load 128-191 bits | |
431 | mov 20(%rsi),@S[1] | |
432 | cmp \$192,$keyend | |
433 | je .L1st192 | |
434 | mov 24(%rsi),@S[2] # load 192-255 bits | |
435 | mov 28(%rsi),@S[3] | |
436 | jmp .L1st256 | |
437 | .L1st192: | |
438 | mov @S[0],@S[2] | |
439 | mov @S[1],@S[3] | |
440 | not @S[2] | |
441 | not @S[3] | |
442 | .L1st256: | |
443 | bswap @S[0] | |
444 | bswap @S[1] | |
445 | bswap @S[2] | |
446 | bswap @S[3] | |
447 | ___ | |
448 | &_saveround (4,$out,@S); # temp storage for KR! | |
449 | $code.=<<___; | |
450 | xor 0($out),@S[1] # KR^KL | |
451 | xor 4($out),@S[0] | |
452 | xor 8($out),@S[3] | |
453 | xor 12($out),@S[2] | |
454 | ||
455 | .L1st128: | |
456 | lea .LCamellia_SIGMA(%rip),$key | |
457 | lea .LCamellia_SBOX(%rip),$Tbl | |
458 | ||
459 | mov 0($key),$t1 | |
460 | mov 4($key),$t0 | |
461 | ___ | |
462 | &Camellia_Feistel($step++); | |
463 | &Camellia_Feistel($step++); | |
464 | $code.=<<___; | |
465 | xor 0($out),@S[1] # ^KL | |
466 | xor 4($out),@S[0] | |
467 | xor 8($out),@S[3] | |
468 | xor 12($out),@S[2] | |
469 | ___ | |
470 | &Camellia_Feistel($step++); | |
471 | &Camellia_Feistel($step++); | |
472 | $code.=<<___; | |
473 | cmp \$128,$keyend | |
474 | jne .L2nd256 | |
475 | ||
476 | lea 128($out),$out # size optimization | |
477 | shl \$32,%r8 # @S[0]|| | |
478 | shl \$32,%r10 # @S[2]|| | |
479 | or %r9,%r8 # ||@S[1] | |
480 | or %r11,%r10 # ||@S[3] | |
481 | ___ | |
482 | &_loadround (0,$out,-128,"%rax","%rbx"); # KL | |
483 | &_saveround (2,$out,-128,"%r8","%r10"); # KA<<<0 | |
484 | &_rotl128 ("%rax","%rbx",15); | |
485 | &_saveround (4,$out,-128,"%rax","%rbx"); # KL<<<15 | |
486 | &_rotl128 ("%r8","%r10",15); | |
487 | &_saveround (6,$out,-128,"%r8","%r10"); # KA<<<15 | |
488 | &_rotl128 ("%r8","%r10",15); # 15+15=30 | |
489 | &_saveround (8,$out,-128,"%r8","%r10"); # KA<<<30 | |
490 | &_rotl128 ("%rax","%rbx",30); # 15+30=45 | |
491 | &_saveround (10,$out,-128,"%rax","%rbx"); # KL<<<45 | |
492 | &_rotl128 ("%r8","%r10",15); # 30+15=45 | |
493 | &_saveround (12,$out,-128,"%r8"); # KA<<<45 | |
494 | &_rotl128 ("%rax","%rbx",15); # 45+15=60 | |
495 | &_saveround (13,$out,-128,"%rbx"); # KL<<<60 | |
496 | &_rotl128 ("%r8","%r10",15); # 45+15=60 | |
497 | &_saveround (14,$out,-128,"%r8","%r10"); # KA<<<60 | |
498 | &_rotl128 ("%rax","%rbx",17); # 60+17=77 | |
499 | &_saveround (16,$out,-128,"%rax","%rbx"); # KL<<<77 | |
500 | &_rotl128 ("%rax","%rbx",17); # 77+17=94 | |
501 | &_saveround (18,$out,-128,"%rax","%rbx"); # KL<<<94 | |
502 | &_rotl128 ("%r8","%r10",34); # 60+34=94 | |
503 | &_saveround (20,$out,-128,"%r8","%r10"); # KA<<<94 | |
504 | &_rotl128 ("%rax","%rbx",17); # 94+17=111 | |
505 | &_saveround (22,$out,-128,"%rax","%rbx"); # KL<<<111 | |
506 | &_rotl128 ("%r8","%r10",17); # 94+17=111 | |
507 | &_saveround (24,$out,-128,"%r8","%r10"); # KA<<<111 | |
508 | $code.=<<___; | |
509 | mov \$3,%eax | |
510 | jmp .Ldone | |
511 | .align 16 | |
512 | .L2nd256: | |
513 | ___ | |
514 | &_saveround (6,$out,@S); # temp storage for KA! | |
515 | $code.=<<___; | |
516 | xor `4*8+0`($out),@S[1] # KA^KR | |
517 | xor `4*8+4`($out),@S[0] | |
518 | xor `5*8+0`($out),@S[3] | |
519 | xor `5*8+4`($out),@S[2] | |
520 | ___ | |
521 | &Camellia_Feistel($step++); | |
522 | &Camellia_Feistel($step++); | |
523 | ||
524 | &_loadround (0,$out,"%rax","%rbx"); # KL | |
525 | &_loadround (4,$out,"%rcx","%rdx"); # KR | |
526 | &_loadround (6,$out,"%r14","%r15"); # KA | |
527 | $code.=<<___; | |
528 | lea 128($out),$out # size optimization | |
529 | shl \$32,%r8 # @S[0]|| | |
530 | shl \$32,%r10 # @S[2]|| | |
531 | or %r9,%r8 # ||@S[1] | |
532 | or %r11,%r10 # ||@S[3] | |
533 | ___ | |
534 | &_saveround (2,$out,-128,"%r8","%r10"); # KB<<<0 | |
535 | &_rotl128 ("%rcx","%rdx",15); | |
536 | &_saveround (4,$out,-128,"%rcx","%rdx"); # KR<<<15 | |
537 | &_rotl128 ("%r14","%r15",15); | |
538 | &_saveround (6,$out,-128,"%r14","%r15"); # KA<<<15 | |
539 | &_rotl128 ("%rcx","%rdx",15); # 15+15=30 | |
540 | &_saveround (8,$out,-128,"%rcx","%rdx"); # KR<<<30 | |
541 | &_rotl128 ("%r8","%r10",30); | |
542 | &_saveround (10,$out,-128,"%r8","%r10"); # KB<<<30 | |
543 | &_rotl128 ("%rax","%rbx",45); | |
544 | &_saveround (12,$out,-128,"%rax","%rbx"); # KL<<<45 | |
545 | &_rotl128 ("%r14","%r15",30); # 15+30=45 | |
546 | &_saveround (14,$out,-128,"%r14","%r15"); # KA<<<45 | |
547 | &_rotl128 ("%rax","%rbx",15); # 45+15=60 | |
548 | &_saveround (16,$out,-128,"%rax","%rbx"); # KL<<<60 | |
549 | &_rotl128 ("%rcx","%rdx",30); # 30+30=60 | |
550 | &_saveround (18,$out,-128,"%rcx","%rdx"); # KR<<<60 | |
551 | &_rotl128 ("%r8","%r10",30); # 30+30=60 | |
552 | &_saveround (20,$out,-128,"%r8","%r10"); # KB<<<60 | |
553 | &_rotl128 ("%rax","%rbx",17); # 60+17=77 | |
554 | &_saveround (22,$out,-128,"%rax","%rbx"); # KL<<<77 | |
555 | &_rotl128 ("%r14","%r15",32); # 45+32=77 | |
556 | &_saveround (24,$out,-128,"%r14","%r15"); # KA<<<77 | |
557 | &_rotl128 ("%rcx","%rdx",34); # 60+34=94 | |
558 | &_saveround (26,$out,-128,"%rcx","%rdx"); # KR<<<94 | |
559 | &_rotl128 ("%r14","%r15",17); # 77+17=94 | |
560 | &_saveround (28,$out,-128,"%r14","%r15"); # KA<<<77 | |
561 | &_rotl128 ("%rax","%rbx",34); # 77+34=111 | |
562 | &_saveround (30,$out,-128,"%rax","%rbx"); # KL<<<111 | |
563 | &_rotl128 ("%r8","%r10",51); # 60+51=111 | |
564 | &_saveround (32,$out,-128,"%r8","%r10"); # KB<<<111 | |
565 | $code.=<<___; | |
566 | mov \$4,%eax | |
567 | .Ldone: | |
568 | mov 0(%rsp),%r15 | |
569 | mov 8(%rsp),%r14 | |
570 | mov 16(%rsp),%r13 | |
571 | mov 24(%rsp),%rbp | |
572 | mov 32(%rsp),%rbx | |
573 | lea 40(%rsp),%rsp | |
574 | .Lkey_epilogue: | |
575 | ret | |
576 | .size Camellia_Ekeygen,.-Camellia_Ekeygen | |
577 | ___ | |
578 | } | |
579 | ||
580 | @SBOX=( | |
581 | 112,130, 44,236,179, 39,192,229,228,133, 87, 53,234, 12,174, 65, | |
582 | 35,239,107,147, 69, 25,165, 33,237, 14, 79, 78, 29,101,146,189, | |
583 | 134,184,175,143,124,235, 31,206, 62, 48,220, 95, 94,197, 11, 26, | |
584 | 166,225, 57,202,213, 71, 93, 61,217, 1, 90,214, 81, 86,108, 77, | |
585 | 139, 13,154,102,251,204,176, 45,116, 18, 43, 32,240,177,132,153, | |
586 | 223, 76,203,194, 52,126,118, 5,109,183,169, 49,209, 23, 4,215, | |
587 | 20, 88, 58, 97,222, 27, 17, 28, 50, 15,156, 22, 83, 24,242, 34, | |
588 | 254, 68,207,178,195,181,122,145, 36, 8,232,168, 96,252,105, 80, | |
589 | 170,208,160,125,161,137, 98,151, 84, 91, 30,149,224,255,100,210, | |
590 | 16,196, 0, 72,163,247,117,219,138, 3,230,218, 9, 63,221,148, | |
591 | 135, 92,131, 2,205, 74,144, 51,115,103,246,243,157,127,191,226, | |
592 | 82,155,216, 38,200, 55,198, 59,129,150,111, 75, 19,190, 99, 46, | |
593 | 233,121,167,140,159,110,188,142, 41,245,249,182, 47,253,180, 89, | |
594 | 120,152, 6,106,231, 70,113,186,212, 37,171, 66,136,162,141,250, | |
595 | 114, 7,185, 85,248,238,172, 10, 54, 73, 42,104, 60, 56,241,164, | |
596 | 64, 40,211,123,187,201, 67,193, 21,227,173,244,119,199,128,158); | |
597 | ||
598 | sub S1110 { my $i=shift; $i=@SBOX[$i]; $i=$i<<24|$i<<16|$i<<8; sprintf("0x%08x",$i); } | |
599 | sub S4404 { my $i=shift; $i=($i<<1|$i>>7)&0xff; $i=@SBOX[$i]; $i=$i<<24|$i<<16|$i; sprintf("0x%08x",$i); } | |
600 | sub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; $i=$i<<16|$i<<8|$i; sprintf("0x%08x",$i); } | |
601 | sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; $i=$i<<24|$i<<8|$i; sprintf("0x%08x",$i); } | |
602 | ||
603 | $code.=<<___; | |
604 | .align 64 | |
605 | .LCamellia_SIGMA: | |
606 | .long 0x3bcc908b, 0xa09e667f, 0x4caa73b2, 0xb67ae858 | |
607 | .long 0xe94f82be, 0xc6ef372f, 0xf1d36f1c, 0x54ff53a5 | |
608 | .long 0xde682d1d, 0x10e527fa, 0xb3e6c1fd, 0xb05688c2 | |
609 | .long 0, 0, 0, 0 | |
610 | .LCamellia_SBOX: | |
611 | ___ | |
612 | # tables are interleaved, remember? | |
613 | sub data_word { $code.=".long\t".join(',',@_)."\n"; } | |
614 | for ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); } | |
615 | for ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); } | |
616 | ||
617 | # void Camellia_cbc_encrypt (const void char *inp, unsigned char *out, | |
618 | # size_t length, const CAMELLIA_KEY *key, | |
619 | # unsigned char *ivp,const int enc); | |
620 | { | |
621 | $_key="0(%rsp)"; | |
622 | $_end="8(%rsp)"; # inp+len&~15 | |
623 | $_res="16(%rsp)"; # len&15 | |
624 | $ivec="24(%rsp)"; | |
625 | $_ivp="40(%rsp)"; | |
626 | $_rsp="48(%rsp)"; | |
627 | ||
628 | $code.=<<___; | |
629 | .globl Camellia_cbc_encrypt | |
630 | .type Camellia_cbc_encrypt,\@function,6 | |
631 | .align 16 | |
632 | Camellia_cbc_encrypt: | |
633 | cmp \$0,%rdx | |
634 | je .Lcbc_abort | |
635 | push %rbx | |
636 | push %rbp | |
637 | push %r12 | |
638 | push %r13 | |
639 | push %r14 | |
640 | push %r15 | |
641 | .Lcbc_prologue: | |
642 | ||
643 | mov %rsp,%rbp | |
644 | sub \$64,%rsp | |
645 | and \$-64,%rsp | |
646 | ||
647 | # place stack frame just "above mod 1024" the key schedule, | |
648 | # this ensures that cache associativity suffices | |
649 | lea -64-63(%rcx),%r10 | |
650 | sub %rsp,%r10 | |
651 | neg %r10 | |
652 | and \$0x3C0,%r10 | |
653 | sub %r10,%rsp | |
654 | #add \$8,%rsp # 8 is reserved for callee's ra | |
655 | ||
656 | mov %rdi,$inp # inp argument | |
657 | mov %rsi,$out # out argument | |
658 | mov %r8,%rbx # ivp argument | |
659 | mov %rcx,$key # key argument | |
660 | mov 272(%rcx),${keyend}d # grandRounds | |
661 | ||
662 | mov %r8,$_ivp | |
663 | mov %rbp,$_rsp | |
664 | ||
665 | .Lcbc_body: | |
666 | lea .LCamellia_SBOX(%rip),$Tbl | |
667 | ||
668 | mov \$32,%ecx | |
669 | .align 4 | |
670 | .Lcbc_prefetch_sbox: | |
671 | mov 0($Tbl),%rax | |
672 | mov 32($Tbl),%rsi | |
673 | mov 64($Tbl),%rdi | |
674 | mov 96($Tbl),%r11 | |
675 | lea 128($Tbl),$Tbl | |
676 | loop .Lcbc_prefetch_sbox | |
677 | sub \$4096,$Tbl | |
678 | shl \$6,$keyend | |
679 | mov %rdx,%rcx # len argument | |
680 | lea ($key,$keyend),$keyend | |
681 | ||
682 | cmp \$0,%r9d # enc argument | |
683 | je .LCBC_DECRYPT | |
684 | ||
685 | and \$-16,%rdx | |
686 | and \$15,%rcx # length residue | |
687 | lea ($inp,%rdx),%rdx | |
688 | mov $key,$_key | |
689 | mov %rdx,$_end | |
690 | mov %rcx,$_res | |
691 | ||
692 | cmp $inp,%rdx | |
693 | mov 0(%rbx),@S[0] # load IV | |
694 | mov 4(%rbx),@S[1] | |
695 | mov 8(%rbx),@S[2] | |
696 | mov 12(%rbx),@S[3] | |
697 | je .Lcbc_enc_tail | |
698 | jmp .Lcbc_eloop | |
699 | ||
700 | .align 16 | |
701 | .Lcbc_eloop: | |
702 | xor 0($inp),@S[0] | |
703 | xor 4($inp),@S[1] | |
704 | xor 8($inp),@S[2] | |
705 | bswap @S[0] | |
706 | xor 12($inp),@S[3] | |
707 | bswap @S[1] | |
708 | bswap @S[2] | |
709 | bswap @S[3] | |
710 | ||
711 | call _x86_64_Camellia_encrypt | |
712 | ||
713 | mov $_key,$key # "rewind" the key | |
714 | bswap @S[0] | |
715 | mov $_end,%rdx | |
716 | bswap @S[1] | |
717 | mov $_res,%rcx | |
718 | bswap @S[2] | |
719 | mov @S[0],0($out) | |
720 | bswap @S[3] | |
721 | mov @S[1],4($out) | |
722 | mov @S[2],8($out) | |
723 | lea 16($inp),$inp | |
724 | mov @S[3],12($out) | |
725 | cmp %rdx,$inp | |
726 | lea 16($out),$out | |
727 | jne .Lcbc_eloop | |
728 | ||
729 | cmp \$0,%rcx | |
730 | jne .Lcbc_enc_tail | |
731 | ||
732 | mov $_ivp,$out | |
733 | mov @S[0],0($out) # write out IV residue | |
734 | mov @S[1],4($out) | |
735 | mov @S[2],8($out) | |
736 | mov @S[3],12($out) | |
737 | jmp .Lcbc_done | |
738 | ||
739 | .align 16 | |
740 | .Lcbc_enc_tail: | |
741 | xor %rax,%rax | |
742 | mov %rax,0+$ivec | |
743 | mov %rax,8+$ivec | |
744 | mov %rax,$_res | |
745 | ||
746 | .Lcbc_enc_pushf: | |
747 | pushfq | |
748 | cld | |
749 | mov $inp,%rsi | |
750 | lea 8+$ivec,%rdi | |
751 | .long 0x9066A4F3 # rep movsb | |
752 | popfq | |
753 | .Lcbc_enc_popf: | |
754 | ||
755 | lea $ivec,$inp | |
756 | lea 16+$ivec,%rax | |
757 | mov %rax,$_end | |
758 | jmp .Lcbc_eloop # one more time | |
759 | ||
760 | .align 16 | |
761 | .LCBC_DECRYPT: | |
762 | xchg $key,$keyend | |
763 | add \$15,%rdx | |
764 | and \$15,%rcx # length residue | |
765 | and \$-16,%rdx | |
766 | mov $key,$_key | |
767 | lea ($inp,%rdx),%rdx | |
768 | mov %rdx,$_end | |
769 | mov %rcx,$_res | |
770 | ||
771 | mov (%rbx),%rax # load IV | |
772 | mov 8(%rbx),%rbx | |
773 | jmp .Lcbc_dloop | |
774 | .align 16 | |
775 | .Lcbc_dloop: | |
776 | mov 0($inp),@S[0] | |
777 | mov 4($inp),@S[1] | |
778 | mov 8($inp),@S[2] | |
779 | bswap @S[0] | |
780 | mov 12($inp),@S[3] | |
781 | bswap @S[1] | |
782 | mov %rax,0+$ivec # save IV to temporary storage | |
783 | bswap @S[2] | |
784 | mov %rbx,8+$ivec | |
785 | bswap @S[3] | |
786 | ||
787 | call _x86_64_Camellia_decrypt | |
788 | ||
789 | mov $_key,$key # "rewind" the key | |
790 | mov $_end,%rdx | |
791 | mov $_res,%rcx | |
792 | ||
793 | bswap @S[0] | |
794 | mov ($inp),%rax # load IV for next iteration | |
795 | bswap @S[1] | |
796 | mov 8($inp),%rbx | |
797 | bswap @S[2] | |
798 | xor 0+$ivec,@S[0] | |
799 | bswap @S[3] | |
800 | xor 4+$ivec,@S[1] | |
801 | xor 8+$ivec,@S[2] | |
802 | lea 16($inp),$inp | |
803 | xor 12+$ivec,@S[3] | |
804 | cmp %rdx,$inp | |
805 | je .Lcbc_ddone | |
806 | ||
807 | mov @S[0],0($out) | |
808 | mov @S[1],4($out) | |
809 | mov @S[2],8($out) | |
810 | mov @S[3],12($out) | |
811 | ||
812 | lea 16($out),$out | |
813 | jmp .Lcbc_dloop | |
814 | ||
815 | .align 16 | |
816 | .Lcbc_ddone: | |
817 | mov $_ivp,%rdx | |
818 | cmp \$0,%rcx | |
819 | jne .Lcbc_dec_tail | |
820 | ||
821 | mov @S[0],0($out) | |
822 | mov @S[1],4($out) | |
823 | mov @S[2],8($out) | |
824 | mov @S[3],12($out) | |
825 | ||
826 | mov %rax,(%rdx) # write out IV residue | |
827 | mov %rbx,8(%rdx) | |
828 | jmp .Lcbc_done | |
829 | .align 16 | |
830 | .Lcbc_dec_tail: | |
831 | mov @S[0],0+$ivec | |
832 | mov @S[1],4+$ivec | |
833 | mov @S[2],8+$ivec | |
834 | mov @S[3],12+$ivec | |
835 | ||
836 | .Lcbc_dec_pushf: | |
837 | pushfq | |
838 | cld | |
839 | lea 8+$ivec,%rsi | |
840 | lea ($out),%rdi | |
841 | .long 0x9066A4F3 # rep movsb | |
842 | popfq | |
843 | .Lcbc_dec_popf: | |
844 | ||
845 | mov %rax,(%rdx) # write out IV residue | |
846 | mov %rbx,8(%rdx) | |
847 | jmp .Lcbc_done | |
848 | ||
849 | .align 16 | |
850 | .Lcbc_done: | |
851 | mov $_rsp,%rcx | |
852 | mov 0(%rcx),%r15 | |
853 | mov 8(%rcx),%r14 | |
854 | mov 16(%rcx),%r13 | |
855 | mov 24(%rcx),%r12 | |
856 | mov 32(%rcx),%rbp | |
857 | mov 40(%rcx),%rbx | |
858 | lea 48(%rcx),%rsp | |
859 | .Lcbc_abort: | |
860 | ret | |
861 | .size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt | |
862 | ||
863 | .asciz "Camellia for x86_64 by <appro\@openssl.org>" | |
864 | ___ | |
865 | } | |
866 | ||
867 | # EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, | |
868 | # CONTEXT *context,DISPATCHER_CONTEXT *disp) | |
869 | if ($win64) { | |
870 | $rec="%rcx"; | |
871 | $frame="%rdx"; | |
872 | $context="%r8"; | |
873 | $disp="%r9"; | |
874 | ||
875 | $code.=<<___; | |
876 | .extern __imp_RtlVirtualUnwind | |
877 | .type common_se_handler,\@abi-omnipotent | |
878 | .align 16 | |
879 | common_se_handler: | |
880 | push %rsi | |
881 | push %rdi | |
882 | push %rbx | |
883 | push %rbp | |
884 | push %r12 | |
885 | push %r13 | |
886 | push %r14 | |
887 | push %r15 | |
888 | pushfq | |
889 | lea -64(%rsp),%rsp | |
890 | ||
891 | mov 120($context),%rax # pull context->Rax | |
892 | mov 248($context),%rbx # pull context->Rip | |
893 | ||
894 | mov 8($disp),%rsi # disp->ImageBase | |
895 | mov 56($disp),%r11 # disp->HandlerData | |
896 | ||
897 | mov 0(%r11),%r10d # HandlerData[0] | |
898 | lea (%rsi,%r10),%r10 # prologue label | |
899 | cmp %r10,%rbx # context->Rip<prologue label | |
900 | jb .Lin_prologue | |
901 | ||
902 | mov 152($context),%rax # pull context->Rsp | |
903 | ||
904 | mov 4(%r11),%r10d # HandlerData[1] | |
905 | lea (%rsi,%r10),%r10 # epilogue label | |
906 | cmp %r10,%rbx # context->Rip>=epilogue label | |
907 | jae .Lin_prologue | |
908 | ||
909 | lea 40(%rax),%rax | |
910 | mov -8(%rax),%rbx | |
911 | mov -16(%rax),%rbp | |
912 | mov -24(%rax),%r13 | |
913 | mov -32(%rax),%r14 | |
914 | mov -40(%rax),%r15 | |
915 | mov %rbx,144($context) # restore context->Rbx | |
916 | mov %rbp,160($context) # restore context->Rbp | |
917 | mov %r13,224($context) # restore context->R13 | |
918 | mov %r14,232($context) # restore context->R14 | |
919 | mov %r15,240($context) # restore context->R15 | |
920 | ||
921 | .Lin_prologue: | |
922 | mov 8(%rax),%rdi | |
923 | mov 16(%rax),%rsi | |
924 | mov %rax,152($context) # restore context->Rsp | |
925 | mov %rsi,168($context) # restore context->Rsi | |
926 | mov %rdi,176($context) # restore context->Rdi | |
927 | ||
928 | jmp .Lcommon_seh_exit | |
929 | .size common_se_handler,.-common_se_handler | |
930 | ||
931 | .type cbc_se_handler,\@abi-omnipotent | |
932 | .align 16 | |
933 | cbc_se_handler: | |
934 | push %rsi | |
935 | push %rdi | |
936 | push %rbx | |
937 | push %rbp | |
938 | push %r12 | |
939 | push %r13 | |
940 | push %r14 | |
941 | push %r15 | |
942 | pushfq | |
943 | lea -64(%rsp),%rsp | |
944 | ||
945 | mov 120($context),%rax # pull context->Rax | |
946 | mov 248($context),%rbx # pull context->Rip | |
947 | ||
948 | lea .Lcbc_prologue(%rip),%r10 | |
949 | cmp %r10,%rbx # context->Rip<.Lcbc_prologue | |
950 | jb .Lin_cbc_prologue | |
951 | ||
952 | lea .Lcbc_body(%rip),%r10 | |
953 | cmp %r10,%rbx # context->Rip<.Lcbc_body | |
954 | jb .Lin_cbc_frame_setup | |
955 | ||
956 | mov 152($context),%rax # pull context->Rsp | |
957 | ||
958 | lea .Lcbc_abort(%rip),%r10 | |
959 | cmp %r10,%rbx # context->Rip>=.Lcbc_abort | |
960 | jae .Lin_cbc_prologue | |
961 | ||
962 | # handle pushf/popf in Camellia_cbc_encrypt | |
963 | lea .Lcbc_enc_pushf(%rip),%r10 | |
964 | cmp %r10,%rbx # context->Rip<=.Lcbc_enc_pushf | |
965 | jbe .Lin_cbc_no_flag | |
966 | lea 8(%rax),%rax | |
967 | lea .Lcbc_enc_popf(%rip),%r10 | |
968 | cmp %r10,%rbx # context->Rip<.Lcbc_enc_popf | |
969 | jb .Lin_cbc_no_flag | |
970 | lea -8(%rax),%rax | |
971 | lea .Lcbc_dec_pushf(%rip),%r10 | |
972 | cmp %r10,%rbx # context->Rip<=.Lcbc_dec_pushf | |
973 | jbe .Lin_cbc_no_flag | |
974 | lea 8(%rax),%rax | |
975 | lea .Lcbc_dec_popf(%rip),%r10 | |
976 | cmp %r10,%rbx # context->Rip<.Lcbc_dec_popf | |
977 | jb .Lin_cbc_no_flag | |
978 | lea -8(%rax),%rax | |
979 | ||
980 | .Lin_cbc_no_flag: | |
981 | mov 48(%rax),%rax # $_rsp | |
982 | lea 48(%rax),%rax | |
983 | ||
984 | .Lin_cbc_frame_setup: | |
985 | mov -8(%rax),%rbx | |
986 | mov -16(%rax),%rbp | |
987 | mov -24(%rax),%r12 | |
988 | mov -32(%rax),%r13 | |
989 | mov -40(%rax),%r14 | |
990 | mov -48(%rax),%r15 | |
991 | mov %rbx,144($context) # restore context->Rbx | |
992 | mov %rbp,160($context) # restore context->Rbp | |
993 | mov %r12,216($context) # restore context->R12 | |
994 | mov %r13,224($context) # restore context->R13 | |
995 | mov %r14,232($context) # restore context->R14 | |
996 | mov %r15,240($context) # restore context->R15 | |
997 | ||
998 | .Lin_cbc_prologue: | |
999 | mov 8(%rax),%rdi | |
1000 | mov 16(%rax),%rsi | |
1001 | mov %rax,152($context) # restore context->Rsp | |
1002 | mov %rsi,168($context) # restore context->Rsi | |
1003 | mov %rdi,176($context) # restore context->Rdi | |
1004 | ||
1005 | .align 4 | |
1006 | .Lcommon_seh_exit: | |
1007 | ||
1008 | mov 40($disp),%rdi # disp->ContextRecord | |
1009 | mov $context,%rsi # context | |
1010 | mov \$`1232/8`,%ecx # sizeof(CONTEXT) | |
1011 | .long 0xa548f3fc # cld; rep movsq | |
1012 | ||
1013 | mov $disp,%rsi | |
1014 | xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER | |
1015 | mov 8(%rsi),%rdx # arg2, disp->ImageBase | |
1016 | mov 0(%rsi),%r8 # arg3, disp->ControlPc | |
1017 | mov 16(%rsi),%r9 # arg4, disp->FunctionEntry | |
1018 | mov 40(%rsi),%r10 # disp->ContextRecord | |
1019 | lea 56(%rsi),%r11 # &disp->HandlerData | |
1020 | lea 24(%rsi),%r12 # &disp->EstablisherFrame | |
1021 | mov %r10,32(%rsp) # arg5 | |
1022 | mov %r11,40(%rsp) # arg6 | |
1023 | mov %r12,48(%rsp) # arg7 | |
1024 | mov %rcx,56(%rsp) # arg8, (NULL) | |
1025 | call *__imp_RtlVirtualUnwind(%rip) | |
1026 | ||
1027 | mov \$1,%eax # ExceptionContinueSearch | |
1028 | lea 64(%rsp),%rsp | |
1029 | popfq | |
1030 | pop %r15 | |
1031 | pop %r14 | |
1032 | pop %r13 | |
1033 | pop %r12 | |
1034 | pop %rbp | |
1035 | pop %rbx | |
1036 | pop %rdi | |
1037 | pop %rsi | |
1038 | ret | |
1039 | .size cbc_se_handler,.-cbc_se_handler | |
1040 | ||
1041 | .section .pdata | |
1042 | .align 4 | |
1043 | .rva .LSEH_begin_Camellia_EncryptBlock_Rounds | |
1044 | .rva .LSEH_end_Camellia_EncryptBlock_Rounds | |
1045 | .rva .LSEH_info_Camellia_EncryptBlock_Rounds | |
1046 | ||
1047 | .rva .LSEH_begin_Camellia_DecryptBlock_Rounds | |
1048 | .rva .LSEH_end_Camellia_DecryptBlock_Rounds | |
1049 | .rva .LSEH_info_Camellia_DecryptBlock_Rounds | |
1050 | ||
1051 | .rva .LSEH_begin_Camellia_Ekeygen | |
1052 | .rva .LSEH_end_Camellia_Ekeygen | |
1053 | .rva .LSEH_info_Camellia_Ekeygen | |
1054 | ||
1055 | .rva .LSEH_begin_Camellia_cbc_encrypt | |
1056 | .rva .LSEH_end_Camellia_cbc_encrypt | |
1057 | .rva .LSEH_info_Camellia_cbc_encrypt | |
1058 | ||
1059 | .section .xdata | |
1060 | .align 8 | |
1061 | .LSEH_info_Camellia_EncryptBlock_Rounds: | |
1062 | .byte 9,0,0,0 | |
1063 | .rva common_se_handler | |
1064 | .rva .Lenc_prologue,.Lenc_epilogue # HandlerData[] | |
1065 | .LSEH_info_Camellia_DecryptBlock_Rounds: | |
1066 | .byte 9,0,0,0 | |
1067 | .rva common_se_handler | |
1068 | .rva .Ldec_prologue,.Ldec_epilogue # HandlerData[] | |
1069 | .LSEH_info_Camellia_Ekeygen: | |
1070 | .byte 9,0,0,0 | |
1071 | .rva common_se_handler | |
1072 | .rva .Lkey_prologue,.Lkey_epilogue # HandlerData[] | |
1073 | .LSEH_info_Camellia_cbc_encrypt: | |
1074 | .byte 9,0,0,0 | |
1075 | .rva cbc_se_handler | |
1076 | ___ | |
1077 | } | |
1078 | ||
1079 | $code =~ s/\`([^\`]*)\`/eval $1/gem; | |
1080 | print $code; | |
1081 | close STDOUT; |