OpenSSL: Add pre-generated asm files to the build.
authorPeter Avalos <pavalos@dragonflybsd.org>
Sun, 21 Nov 2010 06:25:10 +0000 (20:25 -1000)
committerPeter Avalos <pavalos@dragonflybsd.org>
Mon, 22 Nov 2010 11:11:35 +0000 (01:11 -1000)
This causes measurable performance increases.

The Makefile in asm/ will generate the .s files from the vendor-provided
.pl files when a new version of OpenSSL is imported.

Dragonfly-bug: <http://bugs.dragonflybsd.org/issue1542>

31 files changed:
secure/lib/libcrypto/Makefile
secure/lib/libcrypto/Makefile.inc
secure/lib/libcrypto/asm/Makefile [new file with mode: 0644]
secure/lib/libcrypto/asm/aes-586.s [new file with mode: 0644]
secure/lib/libcrypto/asm/aes-x86_64.s [new file with mode: 0644]
secure/lib/libcrypto/asm/bf-586.s [new file with mode: 0644]
secure/lib/libcrypto/asm/bn-586.s [new file with mode: 0644]
secure/lib/libcrypto/asm/cmll-x86.s [new file with mode: 0644]
secure/lib/libcrypto/asm/cmll-x86_64.s [new file with mode: 0644]
secure/lib/libcrypto/asm/co-586.s [new file with mode: 0644]
secure/lib/libcrypto/asm/crypt586.s [new file with mode: 0644]
secure/lib/libcrypto/asm/des-586.s [new file with mode: 0644]
secure/lib/libcrypto/asm/md5-586.s [new file with mode: 0644]
secure/lib/libcrypto/asm/md5-x86_64.s [new file with mode: 0644]
secure/lib/libcrypto/asm/rc4-586.s [new file with mode: 0644]
secure/lib/libcrypto/asm/rc4-x86_64.s [new file with mode: 0644]
secure/lib/libcrypto/asm/rmd-586.s [new file with mode: 0644]
secure/lib/libcrypto/asm/sha1-586.s [new file with mode: 0644]
secure/lib/libcrypto/asm/sha1-x86_64.s [new file with mode: 0644]
secure/lib/libcrypto/asm/sha256-586.s [new file with mode: 0644]
secure/lib/libcrypto/asm/sha256-x86_64.s [new file with mode: 0644]
secure/lib/libcrypto/asm/sha512-586.s [new file with mode: 0644]
secure/lib/libcrypto/asm/sha512-x86_64.s [new file with mode: 0644]
secure/lib/libcrypto/asm/wp-mmx.s [new file with mode: 0644]
secure/lib/libcrypto/asm/wp-x86_64.s [new file with mode: 0644]
secure/lib/libcrypto/asm/x86-mont.s [new file with mode: 0644]
secure/lib/libcrypto/asm/x86_64-mont.s [new file with mode: 0644]
secure/lib/libcrypto/asm/x86_64cpuid.s [new file with mode: 0644]
secure/lib/libcrypto/asm/x86cpuid.s [new file with mode: 0644]
secure/lib/libcrypto/opensslconf-i386.h
secure/lib/libcrypto/opensslconf-x86_64.h

index 8e2be1b..8b374cd 100644 (file)
@@ -8,6 +8,10 @@ NOLINT=                true
 
 SUBDIR=        engines
 
+.if ${MACHINE_ARCH} == "x86_64"
+LDFLAGS+=-Wl,-Bsymbolic
+.endif
+
 .if exists(Makefile.man)
 .include "Makefile.man"
 .endif
@@ -16,12 +20,22 @@ SUBDIR=     engines
 
 # base sources
 SRCS=  cpt_err.c cryptlib.c cversion.c ebcdic.c ex_data.c \
-       mem.c mem_clr.c mem_dbg.c o_dir.c o_time.c uid.c
+       mem.c mem_dbg.c o_dir.c o_time.c uid.c
+.if ${MACHINE_ARCH} == "i386"
+SRCS+= x86cpuid.s
+.elif ${MACHINE_ARCH} == "x86_64"
+SRCS+= x86_64cpuid.s
+.endif
 INCS=  ../e_os.h ../e_os2.h crypto.h ebcdic.h opensslv.h ossl_typ.h symhacks.h
 
 # aes
-SRCS+= aes_cbc.c aes_cfb.c aes_core.c aes_ctr.c aes_ecb.c aes_ige.c \
+SRCS+= aes_cfb.c aes_ctr.c aes_ecb.c aes_ige.c \
        aes_misc.c aes_ofb.c aes_wrap.c
+.if ${MACHINE_ARCH} == "i386"
+SRCS+= aes-586.s
+.elif ${MACHINE_ARCH} == "x86_64"
+SRCS+= aes-x86_64.s
+.endif
 INCS+= aes.h aes_locl.h
 
 # asn1
@@ -42,7 +56,12 @@ SRCS+=       a_bitstr.c a_bool.c a_bytes.c a_d2i_fp.c a_digest.c \
 INCS+= asn1.h asn1_mac.h asn1t.h
 
 # bf
-SRCS+= bf_cfb64.c bf_ecb.c bf_enc.c bf_ofb64.c bf_skey.c
+SRCS+= bf_cfb64.c bf_ecb.c bf_ofb64.c bf_skey.c
+.if ${MACHINE_ARCH} == "i386"
+SRCS+= bf-586.s
+.elif ${MACHINE_ARCH} == "x86_64"
+SRCS+= bf_enc.c
+.endif
 INCS+= blowfish.h
 
 # bio
@@ -53,12 +72,17 @@ SRCS+=      b_dump.c b_print.c b_sock.c bf_buff.c bf_lbuf.c bf_nbio.c \
 INCS+= bio.h
 
 # bn
-SRCS+= bn_add.c bn_asm.c bn_blind.c bn_const.c bn_ctx.c bn_depr.c \
+SRCS+= bn_add.c bn_blind.c bn_const.c bn_ctx.c bn_depr.c \
        bn_div.c bn_err.c bn_exp.c \
        bn_exp2.c bn_gcd.c bn_gf2m.c bn_kron.c bn_lib.c bn_mod.c bn_mont.c \
        bn_mpi.c bn_mul.c bn_nist.c \
        bn_prime.c bn_print.c bn_rand.c bn_recp.c \
        bn_shift.c bn_sqr.c bn_sqrt.c bn_word.c
+.if ${MACHINE_ARCH} == "i386"
+SRCS+= bn-586.s co-586.s x86-mont.s
+.elif ${MACHINE_ARCH} == "x86_64"
+SRCS+= x86_64-gcc.c x86_64-mont.s
+.endif
 INCS+= bn.h
 
 # buffer
@@ -66,8 +90,13 @@ SRCS+=       buf_err.c buffer.c
 INCS+= buffer.h
 
 # camellia
-SRCS+= camellia.c cmll_cbc.c cmll_cfb.c \
-       cmll_ctr.c cmll_ecb.c cmll_misc.c cmll_ofb.c
+SRCS+= cmll_cfb.c \
+       cmll_ctr.c cmll_ecb.c cmll_ofb.c
+.if ${MACHINE_ARCH} == "i386"
+SRCS+= cmll-x86.s
+.elif ${MACHINE_ARCH} == "x86_64"
+SRCS+= cmll-x86_64.s cmll_misc.c
+.endif
 INCS+= camellia.h
 
 # cast
@@ -89,11 +118,16 @@ INCS+=     conf.h conf_api.h
 
 # des
 SRCS+= cbc3_enc.c cbc_cksm.c cbc_enc.c cfb64ede.c cfb64enc.c cfb_enc.c \
-       des_enc.c des_old.c des_old2.c \
+       des_old.c des_old2.c \
        ecb3_enc.c ecb_enc.c ede_cbcm_enc.c \
-       enc_read.c enc_writ.c fcrypt.c fcrypt_b.c ofb64ede.c ofb64enc.c \
+       enc_read.c enc_writ.c fcrypt.c ofb64ede.c ofb64enc.c \
        ofb_enc.c pcbc_enc.c qud_cksm.c rand_key.c read2pwd.c \
        rpc_enc.c set_key.c str2key.c xcbc_enc.c
+.if ${MACHINE_ARCH} == "i386"
+SRCS+= des-586.s crypt586.s
+.elif ${MACHINE_ARCH} == "x86_64"
+SRCS+= des_enc.c fcrypt_b.c
+.endif
 INCS+= des.h des_old.h
 
 # dh
@@ -182,6 +216,11 @@ INCS+=     md4.h
 
 # md5
 SRCS+= md5_dgst.c md5_one.c
+.if ${MACHINE_ARCH} == "i386"
+SRCS+= md5-586.s
+.elif ${MACHINE_ARCH} == "x86_64"
+SRCS+= md5-x86_64.s
+.endif
 INCS+= md5.h
 
 # mdc2
@@ -232,7 +271,11 @@ SRCS+=     rc2_cbc.c rc2_ecb.c rc2_skey.c rc2cfb64.c rc2ofb64.c
 INCS+= rc2.h
 
 # rc4
-SRCS+= rc4_enc.c rc4_skey.c
+.if ${MACHINE_ARCH} == "i386"
+SRCS+= rc4-586.s
+.elif ${MACHINE_ARCH} == "x86_64"
+SRCS+= rc4-x86_64.s
+.endif
 INCS+= rc4.h
 
 # rc5
@@ -242,6 +285,9 @@ INCS+=      rc4.h
 
 # ripemd
 SRCS+= rmd_dgst.c rmd_one.c
+.if ${MACHINE_ARCH} == "i386"
+SRCS+= rmd-586.s
+.endif
 INCS+= ripemd.h
 
 # rsa
@@ -258,6 +304,11 @@ INCS+=     seed.h
 
 # sha
 SRCS+= sha1_one.c sha1dgst.c sha256.c sha512.c sha_dgst.c sha_one.c
+.if ${MACHINE_ARCH} == "i386"
+SRCS+= sha1-586.s sha256-586.s sha512-586.s
+.elif ${MACHINE_ARCH} == "x86_64"
+SRCS+= sha1-x86_64.s sha256-x86_64.s sha512-x86_64.s
+.endif
 INCS+= sha.h
 
 # stack
@@ -283,7 +334,12 @@ SRCS+=     ui_compat.c ui_err.c ui_lib.c ui_openssl.c ui_util.c
 INCS+= ui.h ui_compat.h ui_locl.h
 
 # whrlpool
-SRCS+= wp_block.c wp_dgst.c
+SRCS+= wp_dgst.c
+.if ${MACHINE_ARCH} == "i386"
+SRCS+= wp_block.c wp-mmx.s
+.elif ${MACHINE_ARCH} == "x86_64"
+SRCS+= wp-x86_64.s
+.endif
 INCS+= whrlpool.h
 
 # x509
@@ -327,12 +383,14 @@ opensslconf.h: opensslconf-${MACHINE_ARCH}.h
 .include <bsd.lib.mk>
 
 .PATH: \
+       ${.CURDIR}/asm \
        ${LCRYPTO_SRC}/crypto \
        ${LCRYPTO_SRC}/crypto/aes \
        ${LCRYPTO_SRC}/crypto/asn1 \
        ${LCRYPTO_SRC}/crypto/bf \
        ${LCRYPTO_SRC}/crypto/bio \
        ${LCRYPTO_SRC}/crypto/bn \
+       ${LCRYPTO_SRC}/crypto/bn/asm \
        ${LCRYPTO_SRC}/crypto/buffer \
        ${LCRYPTO_SRC}/crypto/camellia \
        ${LCRYPTO_SRC}/crypto/cast \
index 0396610..e03d19d 100644 (file)
@@ -8,6 +8,11 @@ LCRYPTO_DOC=   ${LCRYPTO_SRC}/doc
 
 CFLAGS+=       -DDSO_DLFCN -DHAVE_DLFCN_H -DL_ENDIAN -DTERMIOS
 CFLAGS+=       -DOPENSSL_THREADS
+CFLAGS+=       -DOPENSSL_IA32_SSE2 -DOPENSSL_BN_ASM_MONT -DSHA1_ASM \
+               -DSHA256_ASM -DSHA512_ASM -DMD5_ASM -DAES_ASM -DWHIRLPOOL_ASM
+.if ${MACHINE_ARCH} == "i386"
+CFLAGS+= -DOPENSSL_BN_ASM_PART_WORDS -DRMD160_ASM
+.endif
 CFLAGS+=       -I${LCRYPTO_SRC} -I${LCRYPTO_SRC}/crypto \
                -I${LCRYPTO_SRC}/crypto/asn1 -I${LCRYPTO_SRC}/crypto/evp \
                -I${LCRYPTO_SRC}/crypto/engine -I${.OBJDIR}
diff --git a/secure/lib/libcrypto/asm/Makefile b/secure/lib/libcrypto/asm/Makefile
new file mode 100644 (file)
index 0000000..9bba3dd
--- /dev/null
@@ -0,0 +1,95 @@
+# This file automatically generates the asm .s files after importing a new
+# version of OpenSSL.  You need to run it with MACHINE_ARCH=i386 and x86_64
+# to get everything generated properly.
+
+OPENSSL_SRC=   ../../../../crypto/openssl
+
+.PATH: ${OPENSSL_SRC}/crypto \
+       ${OPENSSL_SRC}/crypto/aes/asm \
+       ${OPENSSL_SRC}/crypto/bf/asm \
+       ${OPENSSL_SRC}/crypto/bn/asm \
+       ${OPENSSL_SRC}/crypto/camellia/asm \
+       ${OPENSSL_SRC}/crypto/des/asm \
+       ${OPENSSL_SRC}/crypto/md5/asm \
+       ${OPENSSL_SRC}/crypto/perlasm \
+       ${OPENSSL_SRC}/crypto/rc4/asm \
+       ${OPENSSL_SRC}/crypto/ripemd/asm \
+       ${OPENSSL_SRC}/crypto/sha/asm \
+       ${OPENSSL_SRC}/crypto/whrlpool/asm
+
+.if ${MACHINE_ARCH} == "i386"
+# cpuid
+SRCS=  x86cpuid.pl
+
+# aes
+SRCS+= aes-586.pl
+
+# bf
+SRCS+= bf-586.pl
+
+# bn
+SRCS+= bn-586.pl co-586.pl x86-mont.pl
+
+# camellia
+SRCS+= cmll-x86.pl
+
+# des
+SRCS+= des-586.pl crypt586.pl
+
+# md5
+SRCS+= md5-586.pl
+
+# rc4
+SRCS+= rc4-586.pl
+
+# ripemd
+SRCS+= rmd-586.pl
+
+# sha
+SRCS+= sha1-586.pl sha256-586.pl sha512-586.pl
+
+# whrlpool
+SRCS+= wp-mmx.pl
+
+PERLFLAGS=     ${CFLAGS}
+
+.elif ${MACHINE_ARCH} == "x86_64"
+# cpuid
+SRCS=  x86_64cpuid.pl
+
+# aes
+SRCS+= aes-x86_64.pl
+
+# bn
+SRCS+= x86_64-mont.pl
+
+# camellia
+SRCS+= cmll-x86_64.pl
+
+# md5
+SRCS+= md5-x86_64.pl
+
+# rc4
+SRCS+= rc4-x86_64.pl
+
+# sha
+SRCS+= sha1-x86_64.pl sha256-x86_64.s sha512-x86_64.pl
+
+# whrlpool
+SRCS+= wp-x86_64.pl
+
+PERLFLAGS=
+.endif
+
+all:   ${SRCS:S/.pl$/.s/}
+
+CLEANFILES+=   ${SRCS:S/.pl$/.s/}
+.SUFFIXES:     .pl
+
+sha{256,512}-x86_64.s: ${OPENSSL_SRC}/crypto/sha/asm/sha512-x86_64.pl
+       perl ${.ALLSRC} elf ${.TARGET}
+
+.pl.s:
+       perl ${.IMPSRC} elf ${PERLFLAGS} > ${.TARGET}
+
+.include <bsd.prog.mk>
diff --git a/secure/lib/libcrypto/asm/aes-586.s b/secure/lib/libcrypto/asm/aes-586.s
new file mode 100644 (file)
index 0000000..dca6d0d
--- /dev/null
@@ -0,0 +1,3236 @@
+.file  "aes-586.s"
+.text
+.type  _x86_AES_encrypt_compact,@function
+.align 16
+_x86_AES_encrypt_compact:
+       movl    %edi,20(%esp)
+       xorl    (%edi),%eax
+       xorl    4(%edi),%ebx
+       xorl    8(%edi),%ecx
+       xorl    12(%edi),%edx
+       movl    240(%edi),%esi
+       leal    -2(%esi,%esi,1),%esi
+       leal    (%edi,%esi,8),%esi
+       movl    %esi,24(%esp)
+       movl    -128(%ebp),%edi
+       movl    -96(%ebp),%esi
+       movl    -64(%ebp),%edi
+       movl    -32(%ebp),%esi
+       movl    (%ebp),%edi
+       movl    32(%ebp),%esi
+       movl    64(%ebp),%edi
+       movl    96(%ebp),%esi
+.align 16
+.L000loop:
+       movl    %eax,%esi
+       andl    $255,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       movzbl  %bh,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $8,%edi
+       xorl    %edi,%esi
+       movl    %ecx,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $16,%edi
+       xorl    %edi,%esi
+       movl    %edx,%edi
+       shrl    $24,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $24,%edi
+       xorl    %edi,%esi
+       movl    %esi,4(%esp)
+
+       movl    %ebx,%esi
+       andl    $255,%esi
+       shrl    $16,%ebx
+       movzbl  -128(%ebp,%esi,1),%esi
+       movzbl  %ch,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $8,%edi
+       xorl    %edi,%esi
+       movl    %edx,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $16,%edi
+       xorl    %edi,%esi
+       movl    %eax,%edi
+       shrl    $24,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $24,%edi
+       xorl    %edi,%esi
+       movl    %esi,8(%esp)
+
+       movl    %ecx,%esi
+       andl    $255,%esi
+       shrl    $24,%ecx
+       movzbl  -128(%ebp,%esi,1),%esi
+       movzbl  %dh,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $8,%edi
+       xorl    %edi,%esi
+       movl    %eax,%edi
+       shrl    $16,%edi
+       andl    $255,%edx
+       andl    $255,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $16,%edi
+       xorl    %edi,%esi
+       movzbl  %bh,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $24,%edi
+       xorl    %edi,%esi
+
+       andl    $255,%edx
+       movzbl  -128(%ebp,%edx,1),%edx
+       movzbl  %ah,%eax
+       movzbl  -128(%ebp,%eax,1),%eax
+       shll    $8,%eax
+       xorl    %eax,%edx
+       movl    4(%esp),%eax
+       andl    $255,%ebx
+       movzbl  -128(%ebp,%ebx,1),%ebx
+       shll    $16,%ebx
+       xorl    %ebx,%edx
+       movl    8(%esp),%ebx
+       movzbl  -128(%ebp,%ecx,1),%ecx
+       shll    $24,%ecx
+       xorl    %ecx,%edx
+       movl    %esi,%ecx
+
+       movl    %ecx,%esi
+       andl    $2155905152,%esi
+       movl    %esi,%ebp
+       shrl    $7,%ebp
+       leal    (%ecx,%ecx,1),%edi
+       subl    %ebp,%esi
+       andl    $4278124286,%edi
+       andl    $454761243,%esi
+       movl    %ecx,%ebp
+       xorl    %edi,%esi
+       xorl    %esi,%ecx
+       roll    $24,%ecx
+       xorl    %esi,%ecx
+       rorl    $16,%ebp
+       xorl    %ebp,%ecx
+       rorl    $8,%ebp
+       xorl    %ebp,%ecx
+       movl    %edx,%esi
+       andl    $2155905152,%esi
+       movl    %esi,%ebp
+       shrl    $7,%ebp
+       leal    (%edx,%edx,1),%edi
+       subl    %ebp,%esi
+       andl    $4278124286,%edi
+       andl    $454761243,%esi
+       movl    %edx,%ebp
+       xorl    %edi,%esi
+       xorl    %esi,%edx
+       roll    $24,%edx
+       xorl    %esi,%edx
+       rorl    $16,%ebp
+       xorl    %ebp,%edx
+       rorl    $8,%ebp
+       xorl    %ebp,%edx
+       movl    %eax,%esi
+       andl    $2155905152,%esi
+       movl    %esi,%ebp
+       shrl    $7,%ebp
+       leal    (%eax,%eax,1),%edi
+       subl    %ebp,%esi
+       andl    $4278124286,%edi
+       andl    $454761243,%esi
+       movl    %eax,%ebp
+       xorl    %edi,%esi
+       xorl    %esi,%eax
+       roll    $24,%eax
+       xorl    %esi,%eax
+       rorl    $16,%ebp
+       xorl    %ebp,%eax
+       rorl    $8,%ebp
+       xorl    %ebp,%eax
+       movl    %ebx,%esi
+       andl    $2155905152,%esi
+       movl    %esi,%ebp
+       shrl    $7,%ebp
+       leal    (%ebx,%ebx,1),%edi
+       subl    %ebp,%esi
+       andl    $4278124286,%edi
+       andl    $454761243,%esi
+       movl    %ebx,%ebp
+       xorl    %edi,%esi
+       xorl    %esi,%ebx
+       roll    $24,%ebx
+       xorl    %esi,%ebx
+       rorl    $16,%ebp
+       xorl    %ebp,%ebx
+       rorl    $8,%ebp
+       xorl    %ebp,%ebx
+       movl    20(%esp),%edi
+       movl    28(%esp),%ebp
+       addl    $16,%edi
+       xorl    (%edi),%eax
+       xorl    4(%edi),%ebx
+       xorl    8(%edi),%ecx
+       xorl    12(%edi),%edx
+       cmpl    24(%esp),%edi
+       movl    %edi,20(%esp)
+       jb      .L000loop
+       movl    %eax,%esi
+       andl    $255,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       movzbl  %bh,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $8,%edi
+       xorl    %edi,%esi
+       movl    %ecx,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $16,%edi
+       xorl    %edi,%esi
+       movl    %edx,%edi
+       shrl    $24,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $24,%edi
+       xorl    %edi,%esi
+       movl    %esi,4(%esp)
+
+       movl    %ebx,%esi
+       andl    $255,%esi
+       shrl    $16,%ebx
+       movzbl  -128(%ebp,%esi,1),%esi
+       movzbl  %ch,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $8,%edi
+       xorl    %edi,%esi
+       movl    %edx,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $16,%edi
+       xorl    %edi,%esi
+       movl    %eax,%edi
+       shrl    $24,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $24,%edi
+       xorl    %edi,%esi
+       movl    %esi,8(%esp)
+
+       movl    %ecx,%esi
+       andl    $255,%esi
+       shrl    $24,%ecx
+       movzbl  -128(%ebp,%esi,1),%esi
+       movzbl  %dh,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $8,%edi
+       xorl    %edi,%esi
+       movl    %eax,%edi
+       shrl    $16,%edi
+       andl    $255,%edx
+       andl    $255,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $16,%edi
+       xorl    %edi,%esi
+       movzbl  %bh,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $24,%edi
+       xorl    %edi,%esi
+
+       movl    20(%esp),%edi
+       andl    $255,%edx
+       movzbl  -128(%ebp,%edx,1),%edx
+       movzbl  %ah,%eax
+       movzbl  -128(%ebp,%eax,1),%eax
+       shll    $8,%eax
+       xorl    %eax,%edx
+       movl    4(%esp),%eax
+       andl    $255,%ebx
+       movzbl  -128(%ebp,%ebx,1),%ebx
+       shll    $16,%ebx
+       xorl    %ebx,%edx
+       movl    8(%esp),%ebx
+       movzbl  -128(%ebp,%ecx,1),%ecx
+       shll    $24,%ecx
+       xorl    %ecx,%edx
+       movl    %esi,%ecx
+
+       xorl    16(%edi),%eax
+       xorl    20(%edi),%ebx
+       xorl    24(%edi),%ecx
+       xorl    28(%edi),%edx
+       ret
+.size  _x86_AES_encrypt_compact,.-_x86_AES_encrypt_compact
+.type  _sse_AES_encrypt_compact,@function
+.align 16
+_sse_AES_encrypt_compact:
+       pxor    (%edi),%mm0
+       pxor    8(%edi),%mm4
+       movl    240(%edi),%esi
+       leal    -2(%esi,%esi,1),%esi
+       leal    (%edi,%esi,8),%esi
+       movl    %esi,24(%esp)
+       movl    $454761243,%eax
+       movl    %eax,8(%esp)
+       movl    %eax,12(%esp)
+       movl    -128(%ebp),%eax
+       movl    -96(%ebp),%ebx
+       movl    -64(%ebp),%ecx
+       movl    -32(%ebp),%edx
+       movl    (%ebp),%eax
+       movl    32(%ebp),%ebx
+       movl    64(%ebp),%ecx
+       movl    96(%ebp),%edx
+.align 16
+.L001loop:
+       pshufw  $8,%mm0,%mm1
+       pshufw  $13,%mm4,%mm5
+       movd    %mm1,%eax
+       movd    %mm5,%ebx
+       movzbl  %al,%esi
+       movzbl  -128(%ebp,%esi,1),%ecx
+       pshufw  $13,%mm0,%mm2
+       movzbl  %ah,%edx
+       movzbl  -128(%ebp,%edx,1),%edx
+       shll    $8,%edx
+       shrl    $16,%eax
+       movzbl  %bl,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $16,%esi
+       orl     %esi,%ecx
+       pshufw  $8,%mm4,%mm6
+       movzbl  %bh,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $24,%esi
+       orl     %esi,%edx
+       shrl    $16,%ebx
+       movzbl  %ah,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $8,%esi
+       orl     %esi,%ecx
+       movzbl  %bh,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $24,%esi
+       orl     %esi,%ecx
+       movd    %ecx,%mm0
+       movzbl  %al,%esi
+       movzbl  -128(%ebp,%esi,1),%ecx
+       movd    %mm2,%eax
+       movzbl  %bl,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $16,%esi
+       orl     %esi,%ecx
+       movd    %mm6,%ebx
+       movzbl  %ah,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $24,%esi
+       orl     %esi,%ecx
+       movzbl  %bh,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $8,%esi
+       orl     %esi,%ecx
+       movd    %ecx,%mm1
+       movzbl  %bl,%esi
+       movzbl  -128(%ebp,%esi,1),%ecx
+       shrl    $16,%ebx
+       movzbl  %al,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $16,%esi
+       orl     %esi,%ecx
+       shrl    $16,%eax
+       punpckldq       %mm1,%mm0
+       movzbl  %ah,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $24,%esi
+       orl     %esi,%ecx
+       andl    $255,%eax
+       movzbl  -128(%ebp,%eax,1),%eax
+       shll    $16,%eax
+       orl     %eax,%edx
+       movzbl  %bh,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $8,%esi
+       orl     %esi,%ecx
+       movd    %ecx,%mm4
+       andl    $255,%ebx
+       movzbl  -128(%ebp,%ebx,1),%ebx
+       orl     %ebx,%edx
+       movd    %edx,%mm5
+       punpckldq       %mm5,%mm4
+       addl    $16,%edi
+       cmpl    24(%esp),%edi
+       ja      .L002out
+       movq    8(%esp),%mm2
+       pxor    %mm3,%mm3
+       pxor    %mm7,%mm7
+       movq    %mm0,%mm1
+       movq    %mm4,%mm5
+       pcmpgtb %mm0,%mm3
+       pcmpgtb %mm4,%mm7
+       pand    %mm2,%mm3
+       pand    %mm2,%mm7
+       pshufw  $177,%mm0,%mm2
+       pshufw  $177,%mm4,%mm6
+       paddb   %mm0,%mm0
+       paddb   %mm4,%mm4
+       pxor    %mm3,%mm0
+       pxor    %mm7,%mm4
+       pshufw  $177,%mm2,%mm3
+       pshufw  $177,%mm6,%mm7
+       pxor    %mm0,%mm1
+       pxor    %mm4,%mm5
+       pxor    %mm2,%mm0
+       pxor    %mm6,%mm4
+       movq    %mm3,%mm2
+       movq    %mm7,%mm6
+       pslld   $8,%mm3
+       pslld   $8,%mm7
+       psrld   $24,%mm2
+       psrld   $24,%mm6
+       pxor    %mm3,%mm0
+       pxor    %mm7,%mm4
+       pxor    %mm2,%mm0
+       pxor    %mm6,%mm4
+       movq    %mm1,%mm3
+       movq    %mm5,%mm7
+       movq    (%edi),%mm2
+       movq    8(%edi),%mm6
+       psrld   $8,%mm1
+       psrld   $8,%mm5
+       movl    -128(%ebp),%eax
+       pslld   $24,%mm3
+       pslld   $24,%mm7
+       movl    -64(%ebp),%ebx
+       pxor    %mm1,%mm0
+       pxor    %mm5,%mm4
+       movl    (%ebp),%ecx
+       pxor    %mm3,%mm0
+       pxor    %mm7,%mm4
+       movl    64(%ebp),%edx
+       pxor    %mm2,%mm0
+       pxor    %mm6,%mm4
+       jmp     .L001loop
+.align 16
+.L002out:
+       pxor    (%edi),%mm0
+       pxor    8(%edi),%mm4
+       ret
+.size  _sse_AES_encrypt_compact,.-_sse_AES_encrypt_compact
+.type  _x86_AES_encrypt,@function
+.align 16
+_x86_AES_encrypt:
+       movl    %edi,20(%esp)
+       xorl    (%edi),%eax
+       xorl    4(%edi),%ebx
+       xorl    8(%edi),%ecx
+       xorl    12(%edi),%edx
+       movl    240(%edi),%esi
+       leal    -2(%esi,%esi,1),%esi
+       leal    (%edi,%esi,8),%esi
+       movl    %esi,24(%esp)
+.align 16
+.L003loop:
+       movl    %eax,%esi
+       andl    $255,%esi
+       movl    (%ebp,%esi,8),%esi
+       movzbl  %bh,%edi
+       xorl    3(%ebp,%edi,8),%esi
+       movl    %ecx,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       xorl    2(%ebp,%edi,8),%esi
+       movl    %edx,%edi
+       shrl    $24,%edi
+       xorl    1(%ebp,%edi,8),%esi
+       movl    %esi,4(%esp)
+
+       movl    %ebx,%esi
+       andl    $255,%esi
+       shrl    $16,%ebx
+       movl    (%ebp,%esi,8),%esi
+       movzbl  %ch,%edi
+       xorl    3(%ebp,%edi,8),%esi
+       movl    %edx,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       xorl    2(%ebp,%edi,8),%esi
+       movl    %eax,%edi
+       shrl    $24,%edi
+       xorl    1(%ebp,%edi,8),%esi
+       movl    %esi,8(%esp)
+
+       movl    %ecx,%esi
+       andl    $255,%esi
+       shrl    $24,%ecx
+       movl    (%ebp,%esi,8),%esi
+       movzbl  %dh,%edi
+       xorl    3(%ebp,%edi,8),%esi
+       movl    %eax,%edi
+       shrl    $16,%edi
+       andl    $255,%edx
+       andl    $255,%edi
+       xorl    2(%ebp,%edi,8),%esi
+       movzbl  %bh,%edi
+       xorl    1(%ebp,%edi,8),%esi
+
+       movl    20(%esp),%edi
+       movl    (%ebp,%edx,8),%edx
+       movzbl  %ah,%eax
+       xorl    3(%ebp,%eax,8),%edx
+       movl    4(%esp),%eax
+       andl    $255,%ebx
+       xorl    2(%ebp,%ebx,8),%edx
+       movl    8(%esp),%ebx
+       xorl    1(%ebp,%ecx,8),%edx
+       movl    %esi,%ecx
+
+       addl    $16,%edi
+       xorl    (%edi),%eax
+       xorl    4(%edi),%ebx
+       xorl    8(%edi),%ecx
+       xorl    12(%edi),%edx
+       cmpl    24(%esp),%edi
+       movl    %edi,20(%esp)
+       jb      .L003loop
+       movl    %eax,%esi
+       andl    $255,%esi
+       movl    2(%ebp,%esi,8),%esi
+       andl    $255,%esi
+       movzbl  %bh,%edi
+       movl    (%ebp,%edi,8),%edi
+       andl    $65280,%edi
+       xorl    %edi,%esi
+       movl    %ecx,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       movl    (%ebp,%edi,8),%edi
+       andl    $16711680,%edi
+       xorl    %edi,%esi
+       movl    %edx,%edi
+       shrl    $24,%edi
+       movl    2(%ebp,%edi,8),%edi
+       andl    $4278190080,%edi
+       xorl    %edi,%esi
+       movl    %esi,4(%esp)
+       movl    %ebx,%esi
+       andl    $255,%esi
+       shrl    $16,%ebx
+       movl    2(%ebp,%esi,8),%esi
+       andl    $255,%esi
+       movzbl  %ch,%edi
+       movl    (%ebp,%edi,8),%edi
+       andl    $65280,%edi
+       xorl    %edi,%esi
+       movl    %edx,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       movl    (%ebp,%edi,8),%edi
+       andl    $16711680,%edi
+       xorl    %edi,%esi
+       movl    %eax,%edi
+       shrl    $24,%edi
+       movl    2(%ebp,%edi,8),%edi
+       andl    $4278190080,%edi
+       xorl    %edi,%esi
+       movl    %esi,8(%esp)
+       movl    %ecx,%esi
+       andl    $255,%esi
+       shrl    $24,%ecx
+       movl    2(%ebp,%esi,8),%esi
+       andl    $255,%esi
+       movzbl  %dh,%edi
+       movl    (%ebp,%edi,8),%edi
+       andl    $65280,%edi
+       xorl    %edi,%esi
+       movl    %eax,%edi
+       shrl    $16,%edi
+       andl    $255,%edx
+       andl    $255,%edi
+       movl    (%ebp,%edi,8),%edi
+       andl    $16711680,%edi
+       xorl    %edi,%esi
+       movzbl  %bh,%edi
+       movl    2(%ebp,%edi,8),%edi
+       andl    $4278190080,%edi
+       xorl    %edi,%esi
+       movl    20(%esp),%edi
+       andl    $255,%edx
+       movl    2(%ebp,%edx,8),%edx
+       andl    $255,%edx
+       movzbl  %ah,%eax
+       movl    (%ebp,%eax,8),%eax
+       andl    $65280,%eax
+       xorl    %eax,%edx
+       movl    4(%esp),%eax
+       andl    $255,%ebx
+       movl    (%ebp,%ebx,8),%ebx
+       andl    $16711680,%ebx
+       xorl    %ebx,%edx
+       movl    8(%esp),%ebx
+       movl    2(%ebp,%ecx,8),%ecx
+       andl    $4278190080,%ecx
+       xorl    %ecx,%edx
+       movl    %esi,%ecx
+       addl    $16,%edi
+       xorl    (%edi),%eax
+       xorl    4(%edi),%ebx
+       xorl    8(%edi),%ecx
+       xorl    12(%edi),%edx
+       ret
+.align 64
+.LAES_Te:
+.long  2774754246,2774754246
+.long  2222750968,2222750968
+.long  2574743534,2574743534
+.long  2373680118,2373680118
+.long  234025727,234025727
+.long  3177933782,3177933782
+.long  2976870366,2976870366
+.long  1422247313,1422247313
+.long  1345335392,1345335392
+.long  50397442,50397442
+.long  2842126286,2842126286
+.long  2099981142,2099981142
+.long  436141799,436141799
+.long  1658312629,1658312629
+.long  3870010189,3870010189
+.long  2591454956,2591454956
+.long  1170918031,1170918031
+.long  2642575903,2642575903
+.long  1086966153,1086966153
+.long  2273148410,2273148410
+.long  368769775,368769775
+.long  3948501426,3948501426
+.long  3376891790,3376891790
+.long  200339707,200339707
+.long  3970805057,3970805057
+.long  1742001331,1742001331
+.long  4255294047,4255294047
+.long  3937382213,3937382213
+.long  3214711843,3214711843
+.long  4154762323,4154762323
+.long  2524082916,2524082916
+.long  1539358875,1539358875
+.long  3266819957,3266819957
+.long  486407649,486407649
+.long  2928907069,2928907069
+.long  1780885068,1780885068
+.long  1513502316,1513502316
+.long  1094664062,1094664062
+.long  49805301,49805301
+.long  1338821763,1338821763
+.long  1546925160,1546925160
+.long  4104496465,4104496465
+.long  887481809,887481809
+.long  150073849,150073849
+.long  2473685474,2473685474
+.long  1943591083,1943591083
+.long  1395732834,1395732834
+.long  1058346282,1058346282
+.long  201589768,201589768
+.long  1388824469,1388824469
+.long  1696801606,1696801606
+.long  1589887901,1589887901
+.long  672667696,672667696
+.long  2711000631,2711000631
+.long  251987210,251987210
+.long  3046808111,3046808111
+.long  151455502,151455502
+.long  907153956,907153956
+.long  2608889883,2608889883
+.long  1038279391,1038279391
+.long  652995533,652995533
+.long  1764173646,1764173646
+.long  3451040383,3451040383
+.long  2675275242,2675275242
+.long  453576978,453576978
+.long  2659418909,2659418909
+.long  1949051992,1949051992
+.long  773462580,773462580
+.long  756751158,756751158
+.long  2993581788,2993581788
+.long  3998898868,3998898868
+.long  4221608027,4221608027
+.long  4132590244,4132590244
+.long  1295727478,1295727478
+.long  1641469623,1641469623
+.long  3467883389,3467883389
+.long  2066295122,2066295122
+.long  1055122397,1055122397
+.long  1898917726,1898917726
+.long  2542044179,2542044179
+.long  4115878822,4115878822
+.long  1758581177,1758581177
+.long  0,0
+.long  753790401,753790401
+.long  1612718144,1612718144
+.long  536673507,536673507
+.long  3367088505,3367088505
+.long  3982187446,3982187446
+.long  3194645204,3194645204
+.long  1187761037,1187761037
+.long  3653156455,3653156455
+.long  1262041458,1262041458
+.long  3729410708,3729410708
+.long  3561770136,3561770136
+.long  3898103984,3898103984
+.long  1255133061,1255133061
+.long  1808847035,1808847035
+.long  720367557,720367557
+.long  3853167183,3853167183
+.long  385612781,385612781
+.long  3309519750,3309519750
+.long  3612167578,3612167578
+.long  1429418854,1429418854
+.long  2491778321,2491778321
+.long  3477423498,3477423498
+.long  284817897,284817897
+.long  100794884,100794884
+.long  2172616702,2172616702
+.long  4031795360,4031795360
+.long  1144798328,1144798328
+.long  3131023141,3131023141
+.long  3819481163,3819481163
+.long  4082192802,4082192802
+.long  4272137053,4272137053
+.long  3225436288,3225436288
+.long  2324664069,2324664069
+.long  2912064063,2912064063
+.long  3164445985,3164445985
+.long  1211644016,1211644016
+.long  83228145,83228145
+.long  3753688163,3753688163
+.long  3249976951,3249976951
+.long  1977277103,1977277103
+.long  1663115586,1663115586
+.long  806359072,806359072
+.long  452984805,452984805
+.long  250868733,250868733
+.long  1842533055,1842533055
+.long  1288555905,1288555905
+.long  336333848,336333848
+.long  890442534,890442534
+.long  804056259,804056259
+.long  3781124030,3781124030
+.long  2727843637,2727843637
+.long  3427026056,3427026056
+.long  957814574,957814574
+.long  1472513171,1472513171
+.long  4071073621,4071073621
+.long  2189328124,2189328124
+.long  1195195770,1195195770
+.long  2892260552,2892260552
+.long  3881655738,3881655738
+.long  723065138,723065138
+.long  2507371494,2507371494
+.long  2690670784,2690670784
+.long  2558624025,2558624025
+.long  3511635870,3511635870
+.long  2145180835,2145180835
+.long  1713513028,1713513028
+.long  2116692564,2116692564
+.long  2878378043,2878378043
+.long  2206763019,2206763019
+.long  3393603212,3393603212
+.long  703524551,703524551
+.long  3552098411,3552098411
+.long  1007948840,1007948840
+.long  2044649127,2044649127
+.long  3797835452,3797835452
+.long  487262998,487262998
+.long  1994120109,1994120109
+.long  1004593371,1004593371
+.long  1446130276,1446130276
+.long  1312438900,1312438900
+.long  503974420,503974420
+.long  3679013266,3679013266
+.long  168166924,168166924
+.long  1814307912,1814307912
+.long  3831258296,3831258296
+.long  1573044895,1573044895
+.long  1859376061,1859376061
+.long  4021070915,4021070915
+.long  2791465668,2791465668
+.long  2828112185,2828112185
+.long  2761266481,2761266481
+.long  937747667,937747667
+.long  2339994098,2339994098
+.long  854058965,854058965
+.long  1137232011,1137232011
+.long  1496790894,1496790894
+.long  3077402074,3077402074
+.long  2358086913,2358086913
+.long  1691735473,1691735473
+.long  3528347292,3528347292
+.long  3769215305,3769215305
+.long  3027004632,3027004632
+.long  4199962284,4199962284
+.long  133494003,133494003
+.long  636152527,636152527
+.long  2942657994,2942657994
+.long  2390391540,2390391540
+.long  3920539207,3920539207
+.long  403179536,403179536
+.long  3585784431,3585784431
+.long  2289596656,2289596656
+.long  1864705354,1864705354
+.long  1915629148,1915629148
+.long  605822008,605822008
+.long  4054230615,4054230615
+.long  3350508659,3350508659
+.long  1371981463,1371981463
+.long  602466507,602466507
+.long  2094914977,2094914977
+.long  2624877800,2624877800
+.long  555687742,555687742
+.long  3712699286,3712699286
+.long  3703422305,3703422305
+.long  2257292045,2257292045
+.long  2240449039,2240449039
+.long  2423288032,2423288032
+.long  1111375484,1111375484
+.long  3300242801,3300242801
+.long  2858837708,2858837708
+.long  3628615824,3628615824
+.long  84083462,84083462
+.long  32962295,32962295
+.long  302911004,302911004
+.long  2741068226,2741068226
+.long  1597322602,1597322602
+.long  4183250862,4183250862
+.long  3501832553,3501832553
+.long  2441512471,2441512471
+.long  1489093017,1489093017
+.long  656219450,656219450
+.long  3114180135,3114180135
+.long  954327513,954327513
+.long  335083755,335083755
+.long  3013122091,3013122091
+.long  856756514,856756514
+.long  3144247762,3144247762
+.long  1893325225,1893325225
+.long  2307821063,2307821063
+.long  2811532339,2811532339
+.long  3063651117,3063651117
+.long  572399164,572399164
+.long  2458355477,2458355477
+.long  552200649,552200649
+.long  1238290055,1238290055
+.long  4283782570,4283782570
+.long  2015897680,2015897680
+.long  2061492133,2061492133
+.long  2408352771,2408352771
+.long  4171342169,4171342169
+.long  2156497161,2156497161
+.long  386731290,386731290
+.long  3669999461,3669999461
+.long  837215959,837215959
+.long  3326231172,3326231172
+.long  3093850320,3093850320
+.long  3275833730,3275833730
+.long  2962856233,2962856233
+.long  1999449434,1999449434
+.long  286199582,286199582
+.long  3417354363,3417354363
+.long  4233385128,4233385128
+.long  3602627437,3602627437
+.long  974525996,974525996
+.byte  99,124,119,123,242,107,111,197
+.byte  48,1,103,43,254,215,171,118
+.byte  202,130,201,125,250,89,71,240
+.byte  173,212,162,175,156,164,114,192
+.byte  183,253,147,38,54,63,247,204
+.byte  52,165,229,241,113,216,49,21
+.byte  4,199,35,195,24,150,5,154
+.byte  7,18,128,226,235,39,178,117
+.byte  9,131,44,26,27,110,90,160
+.byte  82,59,214,179,41,227,47,132
+.byte  83,209,0,237,32,252,177,91
+.byte  106,203,190,57,74,76,88,207
+.byte  208,239,170,251,67,77,51,133
+.byte  69,249,2,127,80,60,159,168
+.byte  81,163,64,143,146,157,56,245
+.byte  188,182,218,33,16,255,243,210
+.byte  205,12,19,236,95,151,68,23
+.byte  196,167,126,61,100,93,25,115
+.byte  96,129,79,220,34,42,144,136
+.byte  70,238,184,20,222,94,11,219
+.byte  224,50,58,10,73,6,36,92
+.byte  194,211,172,98,145,149,228,121
+.byte  231,200,55,109,141,213,78,169
+.byte  108,86,244,234,101,122,174,8
+.byte  186,120,37,46,28,166,180,198
+.byte  232,221,116,31,75,189,139,138
+.byte  112,62,181,102,72,3,246,14
+.byte  97,53,87,185,134,193,29,158
+.byte  225,248,152,17,105,217,142,148
+.byte  155,30,135,233,206,85,40,223
+.byte  140,161,137,13,191,230,66,104
+.byte  65,153,45,15,176,84,187,22
+.byte  99,124,119,123,242,107,111,197
+.byte  48,1,103,43,254,215,171,118
+.byte  202,130,201,125,250,89,71,240
+.byte  173,212,162,175,156,164,114,192
+.byte  183,253,147,38,54,63,247,204
+.byte  52,165,229,241,113,216,49,21
+.byte  4,199,35,195,24,150,5,154
+.byte  7,18,128,226,235,39,178,117
+.byte  9,131,44,26,27,110,90,160
+.byte  82,59,214,179,41,227,47,132
+.byte  83,209,0,237,32,252,177,91
+.byte  106,203,190,57,74,76,88,207
+.byte  208,239,170,251,67,77,51,133
+.byte  69,249,2,127,80,60,159,168
+.byte  81,163,64,143,146,157,56,245
+.byte  188,182,218,33,16,255,243,210
+.byte  205,12,19,236,95,151,68,23
+.byte  196,167,126,61,100,93,25,115
+.byte  96,129,79,220,34,42,144,136
+.byte  70,238,184,20,222,94,11,219
+.byte  224,50,58,10,73,6,36,92
+.byte  194,211,172,98,145,149,228,121
+.byte  231,200,55,109,141,213,78,169
+.byte  108,86,244,234,101,122,174,8
+.byte  186,120,37,46,28,166,180,198
+.byte  232,221,116,31,75,189,139,138
+.byte  112,62,181,102,72,3,246,14
+.byte  97,53,87,185,134,193,29,158
+.byte  225,248,152,17,105,217,142,148
+.byte  155,30,135,233,206,85,40,223
+.byte  140,161,137,13,191,230,66,104
+.byte  65,153,45,15,176,84,187,22
+.byte  99,124,119,123,242,107,111,197
+.byte  48,1,103,43,254,215,171,118
+.byte  202,130,201,125,250,89,71,240
+.byte  173,212,162,175,156,164,114,192
+.byte  183,253,147,38,54,63,247,204
+.byte  52,165,229,241,113,216,49,21
+.byte  4,199,35,195,24,150,5,154
+.byte  7,18,128,226,235,39,178,117
+.byte  9,131,44,26,27,110,90,160
+.byte  82,59,214,179,41,227,47,132
+.byte  83,209,0,237,32,252,177,91
+.byte  106,203,190,57,74,76,88,207
+.byte  208,239,170,251,67,77,51,133
+.byte  69,249,2,127,80,60,159,168
+.byte  81,163,64,143,146,157,56,245
+.byte  188,182,218,33,16,255,243,210
+.byte  205,12,19,236,95,151,68,23
+.byte  196,167,126,61,100,93,25,115
+.byte  96,129,79,220,34,42,144,136
+.byte  70,238,184,20,222,94,11,219
+.byte  224,50,58,10,73,6,36,92
+.byte  194,211,172,98,145,149,228,121
+.byte  231,200,55,109,141,213,78,169
+.byte  108,86,244,234,101,122,174,8
+.byte  186,120,37,46,28,166,180,198
+.byte  232,221,116,31,75,189,139,138
+.byte  112,62,181,102,72,3,246,14
+.byte  97,53,87,185,134,193,29,158
+.byte  225,248,152,17,105,217,142,148
+.byte  155,30,135,233,206,85,40,223
+.byte  140,161,137,13,191,230,66,104
+.byte  65,153,45,15,176,84,187,22
+.byte  99,124,119,123,242,107,111,197
+.byte  48,1,103,43,254,215,171,118
+.byte  202,130,201,125,250,89,71,240
+.byte  173,212,162,175,156,164,114,192
+.byte  183,253,147,38,54,63,247,204
+.byte  52,165,229,241,113,216,49,21
+.byte  4,199,35,195,24,150,5,154
+.byte  7,18,128,226,235,39,178,117
+.byte  9,131,44,26,27,110,90,160
+.byte  82,59,214,179,41,227,47,132
+.byte  83,209,0,237,32,252,177,91
+.byte  106,203,190,57,74,76,88,207
+.byte  208,239,170,251,67,77,51,133
+.byte  69,249,2,127,80,60,159,168
+.byte  81,163,64,143,146,157,56,245
+.byte  188,182,218,33,16,255,243,210
+.byte  205,12,19,236,95,151,68,23
+.byte  196,167,126,61,100,93,25,115
+.byte  96,129,79,220,34,42,144,136
+.byte  70,238,184,20,222,94,11,219
+.byte  224,50,58,10,73,6,36,92
+.byte  194,211,172,98,145,149,228,121
+.byte  231,200,55,109,141,213,78,169
+.byte  108,86,244,234,101,122,174,8
+.byte  186,120,37,46,28,166,180,198
+.byte  232,221,116,31,75,189,139,138
+.byte  112,62,181,102,72,3,246,14
+.byte  97,53,87,185,134,193,29,158
+.byte  225,248,152,17,105,217,142,148
+.byte  155,30,135,233,206,85,40,223
+.byte  140,161,137,13,191,230,66,104
+.byte  65,153,45,15,176,84,187,22
+.long  1,2,4,8
+.long  16,32,64,128
+.long  27,54,0,0
+.long  0,0,0,0
+.size  _x86_AES_encrypt,.-_x86_AES_encrypt
+.globl AES_encrypt
+.type  AES_encrypt,@function
+.align 16
+AES_encrypt:
+.L_AES_encrypt_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    20(%esp),%esi
+       movl    28(%esp),%edi
+       movl    %esp,%eax
+       subl    $36,%esp
+       andl    $-64,%esp
+       leal    -127(%edi),%ebx
+       subl    %esp,%ebx
+       negl    %ebx
+       andl    $960,%ebx
+       subl    %ebx,%esp
+       addl    $4,%esp
+       movl    %eax,28(%esp)
+       call    .L004pic_point
+.L004pic_point:
+       popl    %ebp
+       leal    OPENSSL_ia32cap_P,%eax
+       leal    .LAES_Te-.L004pic_point(%ebp),%ebp
+       leal    764(%esp),%ebx
+       subl    %ebp,%ebx
+       andl    $768,%ebx
+       leal    2176(%ebp,%ebx,1),%ebp
+       btl     $25,(%eax)
+       jnc     .L005x86
+       movq    (%esi),%mm0
+       movq    8(%esi),%mm4
+       call    _sse_AES_encrypt_compact
+       movl    28(%esp),%esp
+       movl    24(%esp),%esi
+       movq    %mm0,(%esi)
+       movq    %mm4,8(%esi)
+       emms
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.align 16
+.L005x86:
+       movl    %ebp,24(%esp)
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    8(%esi),%ecx
+       movl    12(%esi),%edx
+       call    _x86_AES_encrypt_compact
+       movl    28(%esp),%esp
+       movl    24(%esp),%esi
+       movl    %eax,(%esi)
+       movl    %ebx,4(%esi)
+       movl    %ecx,8(%esi)
+       movl    %edx,12(%esi)
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  AES_encrypt,.-.L_AES_encrypt_begin
+.type  _x86_AES_decrypt_compact,@function
+.align 16
+_x86_AES_decrypt_compact:
+       movl    %edi,20(%esp)
+       xorl    (%edi),%eax
+       xorl    4(%edi),%ebx
+       xorl    8(%edi),%ecx
+       xorl    12(%edi),%edx
+       movl    240(%edi),%esi
+       leal    -2(%esi,%esi,1),%esi
+       leal    (%edi,%esi,8),%esi
+       movl    %esi,24(%esp)
+       movl    -128(%ebp),%edi
+       movl    -96(%ebp),%esi
+       movl    -64(%ebp),%edi
+       movl    -32(%ebp),%esi
+       movl    (%ebp),%edi
+       movl    32(%ebp),%esi
+       movl    64(%ebp),%edi
+       movl    96(%ebp),%esi
+.align 16
+.L006loop:
+       movl    %eax,%esi
+       andl    $255,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       movzbl  %dh,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $8,%edi
+       xorl    %edi,%esi
+       movl    %ecx,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $16,%edi
+       xorl    %edi,%esi
+       movl    %ebx,%edi
+       shrl    $24,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $24,%edi
+       xorl    %edi,%esi
+       movl    %esi,4(%esp)
+       movl    %ebx,%esi
+       andl    $255,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       movzbl  %ah,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $8,%edi
+       xorl    %edi,%esi
+       movl    %edx,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $16,%edi
+       xorl    %edi,%esi
+       movl    %ecx,%edi
+       shrl    $24,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $24,%edi
+       xorl    %edi,%esi
+       movl    %esi,8(%esp)
+       movl    %ecx,%esi
+       andl    $255,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       movzbl  %bh,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $8,%edi
+       xorl    %edi,%esi
+       movl    %eax,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $16,%edi
+       xorl    %edi,%esi
+       movl    %edx,%edi
+       shrl    $24,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $24,%edi
+       xorl    %edi,%esi
+       andl    $255,%edx
+       movzbl  -128(%ebp,%edx,1),%edx
+       movzbl  %ch,%ecx
+       movzbl  -128(%ebp,%ecx,1),%ecx
+       shll    $8,%ecx
+       xorl    %ecx,%edx
+       movl    %esi,%ecx
+       shrl    $16,%ebx
+       andl    $255,%ebx
+       movzbl  -128(%ebp,%ebx,1),%ebx
+       shll    $16,%ebx
+       xorl    %ebx,%edx
+       shrl    $24,%eax
+       movzbl  -128(%ebp,%eax,1),%eax
+       shll    $24,%eax
+       xorl    %eax,%edx
+       movl    %ecx,%esi
+       andl    $2155905152,%esi
+       movl    %esi,%edi
+       shrl    $7,%edi
+       leal    (%ecx,%ecx,1),%eax
+       subl    %edi,%esi
+       andl    $4278124286,%eax
+       andl    $454761243,%esi
+       xorl    %eax,%esi
+       movl    %esi,%eax
+       andl    $2155905152,%esi
+       movl    %esi,%edi
+       shrl    $7,%edi
+       leal    (%eax,%eax,1),%ebx
+       subl    %edi,%esi
+       andl    $4278124286,%ebx
+       andl    $454761243,%esi
+       xorl    %ecx,%eax
+       xorl    %ebx,%esi
+       movl    %esi,%ebx
+       andl    $2155905152,%esi
+       movl    %esi,%edi
+       shrl    $7,%edi
+       leal    (%ebx,%ebx,1),%ebp
+       subl    %edi,%esi
+       andl    $4278124286,%ebp
+       andl    $454761243,%esi
+       xorl    %ecx,%ebx
+       roll    $8,%ecx
+       xorl    %esi,%ebp
+       xorl    %eax,%ecx
+       xorl    %ebp,%eax
+       roll    $24,%eax
+       xorl    %ebx,%ecx
+       xorl    %ebp,%ebx
+       roll    $16,%ebx
+       xorl    %ebp,%ecx
+       roll    $8,%ebp
+       xorl    %eax,%ecx
+       xorl    %ebx,%ecx
+       movl    4(%esp),%eax
+       xorl    %ebp,%ecx
+       movl    %ecx,12(%esp)
+       movl    %edx,%esi
+       andl    $2155905152,%esi
+       movl    %esi,%edi
+       shrl    $7,%edi
+       leal    (%edx,%edx,1),%ebx
+       subl    %edi,%esi
+       andl    $4278124286,%ebx
+       andl    $454761243,%esi
+       xorl    %ebx,%esi
+       movl    %esi,%ebx
+       andl    $2155905152,%esi
+       movl    %esi,%edi
+       shrl    $7,%edi
+       leal    (%ebx,%ebx,1),%ecx
+       subl    %edi,%esi
+       andl    $4278124286,%ecx
+       andl    $454761243,%esi
+       xorl    %edx,%ebx
+       xorl    %ecx,%esi
+       movl    %esi,%ecx
+       andl    $2155905152,%esi
+       movl    %esi,%edi
+       shrl    $7,%edi
+       leal    (%ecx,%ecx,1),%ebp
+       subl    %edi,%esi
+       andl    $4278124286,%ebp
+       andl    $454761243,%esi
+       xorl    %edx,%ecx
+       roll    $8,%edx
+       xorl    %esi,%ebp
+       xorl    %ebx,%edx
+       xorl    %ebp,%ebx
+       roll    $24,%ebx
+       xorl    %ecx,%edx
+       xorl    %ebp,%ecx
+       roll    $16,%ecx
+       xorl    %ebp,%edx
+       roll    $8,%ebp
+       xorl    %ebx,%edx
+       xorl    %ecx,%edx
+       movl    8(%esp),%ebx
+       xorl    %ebp,%edx
+       movl    %edx,16(%esp)
+       movl    %eax,%esi
+       andl    $2155905152,%esi
+       movl    %esi,%edi
+       shrl    $7,%edi
+       leal    (%eax,%eax,1),%ecx
+       subl    %edi,%esi
+       andl    $4278124286,%ecx
+       andl    $454761243,%esi
+       xorl    %ecx,%esi
+       movl    %esi,%ecx
+       andl    $2155905152,%esi
+       movl    %esi,%edi
+       shrl    $7,%edi
+       leal    (%ecx,%ecx,1),%edx
+       subl    %edi,%esi
+       andl    $4278124286,%edx
+       andl    $454761243,%esi
+       xorl    %eax,%ecx
+       xorl    %edx,%esi
+       movl    %esi,%edx
+       andl    $2155905152,%esi
+       movl    %esi,%edi
+       shrl    $7,%edi
+       leal    (%edx,%edx,1),%ebp
+       subl    %edi,%esi
+       andl    $4278124286,%ebp
+       andl    $454761243,%esi
+       xorl    %eax,%edx
+       roll    $8,%eax
+       xorl    %esi,%ebp
+       xorl    %ecx,%eax
+       xorl    %ebp,%ecx
+       roll    $24,%ecx
+       xorl    %edx,%eax
+       xorl    %ebp,%edx
+       roll    $16,%edx
+       xorl    %ebp,%eax
+       roll    $8,%ebp
+       xorl    %ecx,%eax
+       xorl    %edx,%eax
+       xorl    %ebp,%eax
+       movl    %ebx,%esi
+       andl    $2155905152,%esi
+       movl    %esi,%edi
+       shrl    $7,%edi
+       leal    (%ebx,%ebx,1),%ecx
+       subl    %edi,%esi
+       andl    $4278124286,%ecx
+       andl    $454761243,%esi
+       xorl    %ecx,%esi
+       movl    %esi,%ecx
+       andl    $2155905152,%esi
+       movl    %esi,%edi
+       shrl    $7,%edi
+       leal    (%ecx,%ecx,1),%edx
+       subl    %edi,%esi
+       andl    $4278124286,%edx
+       andl    $454761243,%esi
+       xorl    %ebx,%ecx
+       xorl    %edx,%esi
+       movl    %esi,%edx
+       andl    $2155905152,%esi
+       movl    %esi,%edi
+       shrl    $7,%edi
+       leal    (%edx,%edx,1),%ebp
+       subl    %edi,%esi
+       andl    $4278124286,%ebp
+       andl    $454761243,%esi
+       xorl    %ebx,%edx
+       roll    $8,%ebx
+       xorl    %esi,%ebp
+       xorl    %ecx,%ebx
+       xorl    %ebp,%ecx
+       roll    $24,%ecx
+       xorl    %edx,%ebx
+       xorl    %ebp,%edx
+       roll    $16,%edx
+       xorl    %ebp,%ebx
+       roll    $8,%ebp
+       xorl    %ecx,%ebx
+       xorl    %edx,%ebx
+       movl    12(%esp),%ecx
+       xorl    %ebp,%ebx
+       movl    16(%esp),%edx
+       movl    20(%esp),%edi
+       movl    28(%esp),%ebp
+       addl    $16,%edi
+       xorl    (%edi),%eax
+       xorl    4(%edi),%ebx
+       xorl    8(%edi),%ecx
+       xorl    12(%edi),%edx
+       cmpl    24(%esp),%edi
+       movl    %edi,20(%esp)
+       jb      .L006loop
+       movl    %eax,%esi
+       andl    $255,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       movzbl  %dh,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $8,%edi
+       xorl    %edi,%esi
+       movl    %ecx,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $16,%edi
+       xorl    %edi,%esi
+       movl    %ebx,%edi
+       shrl    $24,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $24,%edi
+       xorl    %edi,%esi
+       movl    %esi,4(%esp)
+       movl    %ebx,%esi
+       andl    $255,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       movzbl  %ah,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $8,%edi
+       xorl    %edi,%esi
+       movl    %edx,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $16,%edi
+       xorl    %edi,%esi
+       movl    %ecx,%edi
+       shrl    $24,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $24,%edi
+       xorl    %edi,%esi
+       movl    %esi,8(%esp)
+       movl    %ecx,%esi
+       andl    $255,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       movzbl  %bh,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $8,%edi
+       xorl    %edi,%esi
+       movl    %eax,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $16,%edi
+       xorl    %edi,%esi
+       movl    %edx,%edi
+       shrl    $24,%edi
+       movzbl  -128(%ebp,%edi,1),%edi
+       shll    $24,%edi
+       xorl    %edi,%esi
+       movl    20(%esp),%edi
+       andl    $255,%edx
+       movzbl  -128(%ebp,%edx,1),%edx
+       movzbl  %ch,%ecx
+       movzbl  -128(%ebp,%ecx,1),%ecx
+       shll    $8,%ecx
+       xorl    %ecx,%edx
+       movl    %esi,%ecx
+       shrl    $16,%ebx
+       andl    $255,%ebx
+       movzbl  -128(%ebp,%ebx,1),%ebx
+       shll    $16,%ebx
+       xorl    %ebx,%edx
+       movl    8(%esp),%ebx
+       shrl    $24,%eax
+       movzbl  -128(%ebp,%eax,1),%eax
+       shll    $24,%eax
+       xorl    %eax,%edx
+       movl    4(%esp),%eax
+       xorl    16(%edi),%eax
+       xorl    20(%edi),%ebx
+       xorl    24(%edi),%ecx
+       xorl    28(%edi),%edx
+       ret
+.size  _x86_AES_decrypt_compact,.-_x86_AES_decrypt_compact
+.type  _sse_AES_decrypt_compact,@function
+.align 16
+_sse_AES_decrypt_compact:
+       pxor    (%edi),%mm0
+       pxor    8(%edi),%mm4
+       movl    240(%edi),%esi
+       leal    -2(%esi,%esi,1),%esi
+       leal    (%edi,%esi,8),%esi
+       movl    %esi,24(%esp)
+       movl    $454761243,%eax
+       movl    %eax,8(%esp)
+       movl    %eax,12(%esp)
+       movl    -128(%ebp),%eax
+       movl    -96(%ebp),%ebx
+       movl    -64(%ebp),%ecx
+       movl    -32(%ebp),%edx
+       movl    (%ebp),%eax
+       movl    32(%ebp),%ebx
+       movl    64(%ebp),%ecx
+       movl    96(%ebp),%edx
+.align 16
+.L007loop:
+       pshufw  $12,%mm0,%mm1
+       movd    %mm1,%eax
+       pshufw  $9,%mm4,%mm5
+       movzbl  %al,%esi
+       movzbl  -128(%ebp,%esi,1),%ecx
+       movd    %mm5,%ebx
+       movzbl  %ah,%edx
+       movzbl  -128(%ebp,%edx,1),%edx
+       shll    $8,%edx
+       pshufw  $6,%mm0,%mm2
+       movzbl  %bl,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $16,%esi
+       orl     %esi,%ecx
+       shrl    $16,%eax
+       movzbl  %bh,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $24,%esi
+       orl     %esi,%edx
+       shrl    $16,%ebx
+       pshufw  $3,%mm4,%mm6
+       movzbl  %ah,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $24,%esi
+       orl     %esi,%ecx
+       movzbl  %bh,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $8,%esi
+       orl     %esi,%ecx
+       movd    %ecx,%mm0
+       movzbl  %al,%esi
+       movd    %mm2,%eax
+       movzbl  -128(%ebp,%esi,1),%ecx
+       shll    $16,%ecx
+       movzbl  %bl,%esi
+       movd    %mm6,%ebx
+       movzbl  -128(%ebp,%esi,1),%esi
+       orl     %esi,%ecx
+       movzbl  %al,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       orl     %esi,%edx
+       movzbl  %bl,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $16,%esi
+       orl     %esi,%edx
+       movd    %edx,%mm1
+       movzbl  %ah,%esi
+       movzbl  -128(%ebp,%esi,1),%edx
+       shll    $8,%edx
+       movzbl  %bh,%esi
+       shrl    $16,%eax
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $24,%esi
+       orl     %esi,%edx
+       shrl    $16,%ebx
+       punpckldq       %mm1,%mm0
+       movzbl  %bh,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $8,%esi
+       orl     %esi,%ecx
+       andl    $255,%ebx
+       movzbl  -128(%ebp,%ebx,1),%ebx
+       orl     %ebx,%edx
+       movzbl  %al,%esi
+       movzbl  -128(%ebp,%esi,1),%esi
+       shll    $16,%esi
+       orl     %esi,%edx
+       movd    %edx,%mm4
+       movzbl  %ah,%eax
+       movzbl  -128(%ebp,%eax,1),%eax
+       shll    $24,%eax
+       orl     %eax,%ecx
+       movd    %ecx,%mm5
+       punpckldq       %mm5,%mm4
+       addl    $16,%edi
+       cmpl    24(%esp),%edi
+       ja      .L008out
+       movq    %mm0,%mm3
+       movq    %mm4,%mm7
+       pshufw  $228,%mm0,%mm2
+       pshufw  $228,%mm4,%mm6
+       movq    %mm0,%mm1
+       movq    %mm4,%mm5
+       pshufw  $177,%mm0,%mm0
+       pshufw  $177,%mm4,%mm4
+       pslld   $8,%mm2
+       pslld   $8,%mm6
+       psrld   $8,%mm3
+       psrld   $8,%mm7
+       pxor    %mm2,%mm0
+       pxor    %mm6,%mm4
+       pxor    %mm3,%mm0
+       pxor    %mm7,%mm4
+       pslld   $16,%mm2
+       pslld   $16,%mm6
+       psrld   $16,%mm3
+       psrld   $16,%mm7
+       pxor    %mm2,%mm0
+       pxor    %mm6,%mm4
+       pxor    %mm3,%mm0
+       pxor    %mm7,%mm4
+       movq    8(%esp),%mm3
+       pxor    %mm2,%mm2
+       pxor    %mm6,%mm6
+       pcmpgtb %mm1,%mm2
+       pcmpgtb %mm5,%mm6
+       pand    %mm3,%mm2
+       pand    %mm3,%mm6
+       paddb   %mm1,%mm1
+       paddb   %mm5,%mm5
+       pxor    %mm2,%mm1
+       pxor    %mm6,%mm5
+       movq    %mm1,%mm3
+       movq    %mm5,%mm7
+       movq    %mm1,%mm2
+       movq    %mm5,%mm6
+       pxor    %mm1,%mm0
+       pxor    %mm5,%mm4
+       pslld   $24,%mm3
+       pslld   $24,%mm7
+       psrld   $8,%mm2
+       psrld   $8,%mm6
+       pxor    %mm3,%mm0
+       pxor    %mm7,%mm4
+       pxor    %mm2,%mm0
+       pxor    %mm6,%mm4
+       movq    8(%esp),%mm2
+       pxor    %mm3,%mm3
+       pxor    %mm7,%mm7
+       pcmpgtb %mm1,%mm3
+       pcmpgtb %mm5,%mm7
+       pand    %mm2,%mm3
+       pand    %mm2,%mm7
+       paddb   %mm1,%mm1
+       paddb   %mm5,%mm5
+       pxor    %mm3,%mm1
+       pxor    %mm7,%mm5
+       pshufw  $177,%mm1,%mm3
+       pshufw  $177,%mm5,%mm7
+       pxor    %mm1,%mm0
+       pxor    %mm5,%mm4
+       pxor    %mm3,%mm0
+       pxor    %mm7,%mm4
+       pxor    %mm3,%mm3
+       pxor    %mm7,%mm7
+       pcmpgtb %mm1,%mm3
+       pcmpgtb %mm5,%mm7
+       pand    %mm2,%mm3
+       pand    %mm2,%mm7
+       paddb   %mm1,%mm1
+       paddb   %mm5,%mm5
+       pxor    %mm3,%mm1
+       pxor    %mm7,%mm5
+       pxor    %mm1,%mm0
+       pxor    %mm5,%mm4
+       movq    %mm1,%mm3
+       movq    %mm5,%mm7
+       pshufw  $177,%mm1,%mm2
+       pshufw  $177,%mm5,%mm6
+       pxor    %mm2,%mm0
+       pxor    %mm6,%mm4
+       pslld   $8,%mm1
+       pslld   $8,%mm5
+       psrld   $8,%mm3
+       psrld   $8,%mm7
+       movq    (%edi),%mm2
+       movq    8(%edi),%mm6
+       pxor    %mm1,%mm0
+       pxor    %mm5,%mm4
+       pxor    %mm3,%mm0
+       pxor    %mm7,%mm4
+       movl    -128(%ebp),%eax
+       pslld   $16,%mm1
+       pslld   $16,%mm5
+       movl    -64(%ebp),%ebx
+       psrld   $16,%mm3
+       psrld   $16,%mm7
+       movl    (%ebp),%ecx
+       pxor    %mm1,%mm0
+       pxor    %mm5,%mm4
+       movl    64(%ebp),%edx
+       pxor    %mm3,%mm0
+       pxor    %mm7,%mm4
+       pxor    %mm2,%mm0
+       pxor    %mm6,%mm4
+       jmp     .L007loop
+.align 16
+.L008out:
+       pxor    (%edi),%mm0
+       pxor    8(%edi),%mm4
+       ret
+.size  _sse_AES_decrypt_compact,.-_sse_AES_decrypt_compact
+.type  _x86_AES_decrypt,@function
+.align 16
+_x86_AES_decrypt:
+       movl    %edi,20(%esp)
+       xorl    (%edi),%eax
+       xorl    4(%edi),%ebx
+       xorl    8(%edi),%ecx
+       xorl    12(%edi),%edx
+       movl    240(%edi),%esi
+       leal    -2(%esi,%esi,1),%esi
+       leal    (%edi,%esi,8),%esi
+       movl    %esi,24(%esp)
+.align 16
+.L009loop:
+       movl    %eax,%esi
+       andl    $255,%esi
+       movl    (%ebp,%esi,8),%esi
+       movzbl  %dh,%edi
+       xorl    3(%ebp,%edi,8),%esi
+       movl    %ecx,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       xorl    2(%ebp,%edi,8),%esi
+       movl    %ebx,%edi
+       shrl    $24,%edi
+       xorl    1(%ebp,%edi,8),%esi
+       movl    %esi,4(%esp)
+
+       movl    %ebx,%esi
+       andl    $255,%esi
+       movl    (%ebp,%esi,8),%esi
+       movzbl  %ah,%edi
+       xorl    3(%ebp,%edi,8),%esi
+       movl    %edx,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       xorl    2(%ebp,%edi,8),%esi
+       movl    %ecx,%edi
+       shrl    $24,%edi
+       xorl    1(%ebp,%edi,8),%esi
+       movl    %esi,8(%esp)
+
+       movl    %ecx,%esi
+       andl    $255,%esi
+       movl    (%ebp,%esi,8),%esi
+       movzbl  %bh,%edi
+       xorl    3(%ebp,%edi,8),%esi
+       movl    %eax,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       xorl    2(%ebp,%edi,8),%esi
+       movl    %edx,%edi
+       shrl    $24,%edi
+       xorl    1(%ebp,%edi,8),%esi
+
+       movl    20(%esp),%edi
+       andl    $255,%edx
+       movl    (%ebp,%edx,8),%edx
+       movzbl  %ch,%ecx
+       xorl    3(%ebp,%ecx,8),%edx
+       movl    %esi,%ecx
+       shrl    $16,%ebx
+       andl    $255,%ebx
+       xorl    2(%ebp,%ebx,8),%edx
+       movl    8(%esp),%ebx
+       shrl    $24,%eax
+       xorl    1(%ebp,%eax,8),%edx
+       movl    4(%esp),%eax
+
+       addl    $16,%edi
+       xorl    (%edi),%eax
+       xorl    4(%edi),%ebx
+       xorl    8(%edi),%ecx
+       xorl    12(%edi),%edx
+       cmpl    24(%esp),%edi
+       movl    %edi,20(%esp)
+       jb      .L009loop
+       leal    2176(%ebp),%ebp
+       movl    -128(%ebp),%edi
+       movl    -96(%ebp),%esi
+       movl    -64(%ebp),%edi
+       movl    -32(%ebp),%esi
+       movl    (%ebp),%edi
+       movl    32(%ebp),%esi
+       movl    64(%ebp),%edi
+       movl    96(%ebp),%esi
+       leal    -128(%ebp),%ebp
+       movl    %eax,%esi
+       andl    $255,%esi
+       movzbl  (%ebp,%esi,1),%esi
+       movzbl  %dh,%edi
+       movzbl  (%ebp,%edi,1),%edi
+       shll    $8,%edi
+       xorl    %edi,%esi
+       movl    %ecx,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       movzbl  (%ebp,%edi,1),%edi
+       shll    $16,%edi
+       xorl    %edi,%esi
+       movl    %ebx,%edi
+       shrl    $24,%edi
+       movzbl  (%ebp,%edi,1),%edi
+       shll    $24,%edi
+       xorl    %edi,%esi
+       movl    %esi,4(%esp)
+       movl    %ebx,%esi
+       andl    $255,%esi
+       movzbl  (%ebp,%esi,1),%esi
+       movzbl  %ah,%edi
+       movzbl  (%ebp,%edi,1),%edi
+       shll    $8,%edi
+       xorl    %edi,%esi
+       movl    %edx,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       movzbl  (%ebp,%edi,1),%edi
+       shll    $16,%edi
+       xorl    %edi,%esi
+       movl    %ecx,%edi
+       shrl    $24,%edi
+       movzbl  (%ebp,%edi,1),%edi
+       shll    $24,%edi
+       xorl    %edi,%esi
+       movl    %esi,8(%esp)
+       movl    %ecx,%esi
+       andl    $255,%esi
+       movzbl  (%ebp,%esi,1),%esi
+       movzbl  %bh,%edi
+       movzbl  (%ebp,%edi,1),%edi
+       shll    $8,%edi
+       xorl    %edi,%esi
+       movl    %eax,%edi
+       shrl    $16,%edi
+       andl    $255,%edi
+       movzbl  (%ebp,%edi,1),%edi
+       shll    $16,%edi
+       xorl    %edi,%esi
+       movl    %edx,%edi
+       shrl    $24,%edi
+       movzbl  (%ebp,%edi,1),%edi
+       shll    $24,%edi
+       xorl    %edi,%esi
+       movl    20(%esp),%edi
+       andl    $255,%edx
+       movzbl  (%ebp,%edx,1),%edx
+       movzbl  %ch,%ecx
+       movzbl  (%ebp,%ecx,1),%ecx
+       shll    $8,%ecx
+       xorl    %ecx,%edx
+       movl    %esi,%ecx
+       shrl    $16,%ebx
+       andl    $255,%ebx
+       movzbl  (%ebp,%ebx,1),%ebx
+       shll    $16,%ebx
+       xorl    %ebx,%edx
+       movl    8(%esp),%ebx
+       shrl    $24,%eax
+       movzbl  (%ebp,%eax,1),%eax
+       shll    $24,%eax
+       xorl    %eax,%edx
+       movl    4(%esp),%eax
+       leal    -2048(%ebp),%ebp
+       addl    $16,%edi
+       xorl    (%edi),%eax
+       xorl    4(%edi),%ebx
+       xorl    8(%edi),%ecx
+       xorl    12(%edi),%edx
+       ret
+.align 64
+.LAES_Td:
+.long  1353184337,1353184337
+.long  1399144830,1399144830
+.long  3282310938,3282310938
+.long  2522752826,2522752826
+.long  3412831035,3412831035
+.long  4047871263,4047871263
+.long  2874735276,2874735276
+.long  2466505547,2466505547
+.long  1442459680,1442459680
+.long  4134368941,4134368941
+.long  2440481928,2440481928
+.long  625738485,625738485
+.long  4242007375,4242007375
+.long  3620416197,3620416197
+.long  2151953702,2151953702
+.long  2409849525,2409849525
+.long  1230680542,1230680542
+.long  1729870373,1729870373
+.long  2551114309,2551114309
+.long  3787521629,3787521629
+.long  41234371,41234371
+.long  317738113,317738113
+.long  2744600205,2744600205
+.long  3338261355,3338261355
+.long  3881799427,3881799427
+.long  2510066197,2510066197
+.long  3950669247,3950669247
+.long  3663286933,3663286933
+.long  763608788,763608788
+.long  3542185048,3542185048
+.long  694804553,694804553
+.long  1154009486,1154009486
+.long  1787413109,1787413109
+.long  2021232372,2021232372
+.long  1799248025,1799248025
+.long  3715217703,3715217703
+.long  3058688446,3058688446
+.long  397248752,397248752
+.long  1722556617,1722556617
+.long  3023752829,3023752829
+.long  407560035,407560035
+.long  2184256229,2184256229
+.long  1613975959,1613975959
+.long  1165972322,1165972322
+.long  3765920945,3765920945
+.long  2226023355,2226023355
+.long  480281086,480281086
+.long  2485848313,2485848313
+.long  1483229296,1483229296
+.long  436028815,436028815
+.long  2272059028,2272059028
+.long  3086515026,3086515026
+.long  601060267,601060267
+.long  3791801202,3791801202
+.long  1468997603,1468997603
+.long  715871590,715871590
+.long  120122290,120122290
+.long  63092015,63092015
+.long  2591802758,2591802758
+.long  2768779219,2768779219
+.long  4068943920,4068943920
+.long  2997206819,2997206819
+.long  3127509762,3127509762
+.long  1552029421,1552029421
+.long  723308426,723308426
+.long  2461301159,2461301159
+.long  4042393587,4042393587
+.long  2715969870,2715969870
+.long  3455375973,3455375973
+.long  3586000134,3586000134
+.long  526529745,526529745
+.long  2331944644,2331944644
+.long  2639474228,2639474228
+.long  2689987490,2689987490
+.long  853641733,853641733
+.long  1978398372,1978398372
+.long  971801355,971801355
+.long  2867814464,2867814464
+.long  111112542,111112542
+.long  1360031421,1360031421
+.long  4186579262,4186579262
+.long  1023860118,1023860118
+.long  2919579357,2919579357
+.long  1186850381,1186850381
+.long  3045938321,3045938321
+.long  90031217,90031217
+.long  1876166148,1876166148
+.long  4279586912,4279586912
+.long  620468249,620468249
+.long  2548678102,2548678102
+.long  3426959497,3426959497
+.long  2006899047,2006899047
+.long  3175278768,3175278768
+.long  2290845959,2290845959
+.long  945494503,945494503
+.long  3689859193,3689859193
+.long  1191869601,1191869601
+.long  3910091388,3910091388
+.long  3374220536,3374220536
+.long  0,0
+.long  2206629897,2206629897
+.long  1223502642,1223502642
+.long  2893025566,2893025566
+.long  1316117100,1316117100
+.long  4227796733,4227796733
+.long  1446544655,1446544655
+.long  517320253,517320253
+.long  658058550,658058550
+.long  1691946762,1691946762
+.long  564550760,564550760
+.long  3511966619,3511966619
+.long  976107044,976107044
+.long  2976320012,2976320012
+.long  266819475,266819475
+.long  3533106868,3533106868
+.long  2660342555,2660342555
+.long  1338359936,1338359936
+.long  2720062561,2720062561
+.long  1766553434,1766553434
+.long  370807324,370807324
+.long  179999714,179999714
+.long  3844776128,3844776128
+.long  1138762300,1138762300
+.long  488053522,488053522
+.long  185403662,185403662
+.long  2915535858,2915535858
+.long  3114841645,3114841645
+.long  3366526484,3366526484
+.long  2233069911,2233069911
+.long  1275557295,1275557295
+.long  3151862254,3151862254
+.long  4250959779,4250959779
+.long  2670068215,2670068215
+.long  3170202204,3170202204
+.long  3309004356,3309004356
+.long  880737115,880737115
+.long  1982415755,1982415755
+.long  3703972811,3703972811
+.long  1761406390,1761406390
+.long  1676797112,1676797112
+.long  3403428311,3403428311
+.long  277177154,277177154
+.long  1076008723,1076008723
+.long  538035844,538035844
+.long  2099530373,2099530373
+.long  4164795346,4164795346
+.long  288553390,288553390
+.long  1839278535,1839278535
+.long  1261411869,1261411869
+.long  4080055004,4080055004
+.long  3964831245,3964831245
+.long  3504587127,3504587127
+.long  1813426987,1813426987
+.long  2579067049,2579067049
+.long  4199060497,4199060497
+.long  577038663,577038663
+.long  3297574056,3297574056
+.long  440397984,440397984
+.long  3626794326,3626794326
+.long  4019204898,4019204898
+.long  3343796615,3343796615
+.long  3251714265,3251714265
+.long  4272081548,4272081548
+.long  906744984,906744984
+.long  3481400742,3481400742
+.long  685669029,685669029
+.long  646887386,646887386
+.long  2764025151,2764025151
+.long  3835509292,3835509292
+.long  227702864,227702864
+.long  2613862250,2613862250
+.long  1648787028,1648787028
+.long  3256061430,3256061430
+.long  3904428176,3904428176
+.long  1593260334,1593260334
+.long  4121936770,4121936770
+.long  3196083615,3196083615
+.long  2090061929,2090061929
+.long  2838353263,2838353263
+.long  3004310991,3004310991
+.long  999926984,999926984
+.long  2809993232,2809993232
+.long  1852021992,1852021992
+.long  2075868123,2075868123
+.long  158869197,158869197
+.long  4095236462,4095236462
+.long  28809964,28809964
+.long  2828685187,2828685187
+.long  1701746150,1701746150
+.long  2129067946,2129067946
+.long  147831841,147831841
+.long  3873969647,3873969647
+.long  3650873274,3650873274
+.long  3459673930,3459673930
+.long  3557400554,3557400554
+.long  3598495785,3598495785
+.long  2947720241,2947720241
+.long  824393514,824393514
+.long  815048134,815048134
+.long  3227951669,3227951669
+.long  935087732,935087732
+.long  2798289660,2798289660
+.long  2966458592,2966458592
+.long  366520115,366520115
+.long  1251476721,1251476721
+.long  4158319681,4158319681
+.long  240176511,240176511
+.long  804688151,804688151
+.long  2379631990,2379631990
+.long  1303441219,1303441219
+.long  1414376140,1414376140
+.long  3741619940,3741619940
+.long  3820343710,3820343710
+.long  461924940,461924940
+.long  3089050817,3089050817
+.long  2136040774,2136040774
+.long  82468509,82468509
+.long  1563790337,1563790337
+.long  1937016826,1937016826
+.long  776014843,776014843
+.long  1511876531,1511876531
+.long  1389550482,1389550482
+.long  861278441,861278441
+.long  323475053,323475053
+.long  2355222426,2355222426
+.long  2047648055,2047648055
+.long  2383738969,2383738969
+.long  2302415851,2302415851
+.long  3995576782,3995576782
+.long  902390199,902390199
+.long  3991215329,3991215329
+.long  1018251130,1018251130
+.long  1507840668,1507840668
+.long  1064563285,1064563285
+.long  2043548696,2043548696
+.long  3208103795,3208103795
+.long  3939366739,3939366739
+.long  1537932639,1537932639
+.long  342834655,342834655
+.long  2262516856,2262516856
+.long  2180231114,2180231114
+.long  1053059257,1053059257
+.long  741614648,741614648
+.long  1598071746,1598071746
+.long  1925389590,1925389590
+.long  203809468,203809468
+.long  2336832552,2336832552
+.long  1100287487,1100287487
+.long  1895934009,1895934009
+.long  3736275976,3736275976
+.long  2632234200,2632234200
+.long  2428589668,2428589668
+.long  1636092795,1636092795
+.long  1890988757,1890988757
+.long  1952214088,1952214088
+.long  1113045200,1113045200
+.byte  82,9,106,213,48,54,165,56
+.byte  191,64,163,158,129,243,215,251
+.byte  124,227,57,130,155,47,255,135
+.byte  52,142,67,68,196,222,233,203
+.byte  84,123,148,50,166,194,35,61
+.byte  238,76,149,11,66,250,195,78
+.byte  8,46,161,102,40,217,36,178
+.byte  118,91,162,73,109,139,209,37
+.byte  114,248,246,100,134,104,152,22
+.byte  212,164,92,204,93,101,182,146
+.byte  108,112,72,80,253,237,185,218
+.byte  94,21,70,87,167,141,157,132
+.byte  144,216,171,0,140,188,211,10
+.byte  247,228,88,5,184,179,69,6
+.byte  208,44,30,143,202,63,15,2
+.byte  193,175,189,3,1,19,138,107
+.byte  58,145,17,65,79,103,220,234
+.byte  151,242,207,206,240,180,230,115
+.byte  150,172,116,34,231,173,53,133
+.byte  226,249,55,232,28,117,223,110
+.byte  71,241,26,113,29,41,197,137
+.byte  111,183,98,14,170,24,190,27
+.byte  252,86,62,75,198,210,121,32
+.byte  154,219,192,254,120,205,90,244
+.byte  31,221,168,51,136,7,199,49
+.byte  177,18,16,89,39,128,236,95
+.byte  96,81,127,169,25,181,74,13
+.byte  45,229,122,159,147,201,156,239
+.byte  160,224,59,77,174,42,245,176
+.byte  200,235,187,60,131,83,153,97
+.byte  23,43,4,126,186,119,214,38
+.byte  225,105,20,99,85,33,12,125
+.byte  82,9,106,213,48,54,165,56
+.byte  191,64,163,158,129,243,215,251
+.byte  124,227,57,130,155,47,255,135
+.byte  52,142,67,68,196,222,233,203
+.byte  84,123,148,50,166,194,35,61
+.byte  238,76,149,11,66,250,195,78
+.byte  8,46,161,102,40,217,36,178
+.byte  118,91,162,73,109,139,209,37
+.byte  114,248,246,100,134,104,152,22
+.byte  212,164,92,204,93,101,182,146
+.byte  108,112,72,80,253,237,185,218
+.byte  94,21,70,87,167,141,157,132
+.byte  144,216,171,0,140,188,211,10
+.byte  247,228,88,5,184,179,69,6
+.byte  208,44,30,143,202,63,15,2
+.byte  193,175,189,3,1,19,138,107
+.byte  58,145,17,65,79,103,220,234
+.byte  151,242,207,206,240,180,230,115
+.byte  150,172,116,34,231,173,53,133
+.byte  226,249,55,232,28,117,223,110
+.byte  71,241,26,113,29,41,197,137
+.byte  111,183,98,14,170,24,190,27
+.byte  252,86,62,75,198,210,121,32
+.byte  154,219,192,254,120,205,90,244
+.byte  31,221,168,51,136,7,199,49
+.byte  177,18,16,89,39,128,236,95
+.byte  96,81,127,169,25,181,74,13
+.byte  45,229,122,159,147,201,156,239
+.byte  160,224,59,77,174,42,245,176
+.byte  200,235,187,60,131,83,153,97
+.byte  23,43,4,126,186,119,214,38
+.byte  225,105,20,99,85,33,12,125
+.byte  82,9,106,213,48,54,165,56
+.byte  191,64,163,158,129,243,215,251
+.byte  124,227,57,130,155,47,255,135
+.byte  52,142,67,68,196,222,233,203
+.byte  84,123,148,50,166,194,35,61
+.byte  238,76,149,11,66,250,195,78
+.byte  8,46,161,102,40,217,36,178
+.byte  118,91,162,73,109,139,209,37
+.byte  114,248,246,100,134,104,152,22
+.byte  212,164,92,204,93,101,182,146
+.byte  108,112,72,80,253,237,185,218
+.byte  94,21,70,87,167,141,157,132
+.byte  144,216,171,0,140,188,211,10
+.byte  247,228,88,5,184,179,69,6
+.byte  208,44,30,143,202,63,15,2
+.byte  193,175,189,3,1,19,138,107
+.byte  58,145,17,65,79,103,220,234
+.byte  151,242,207,206,240,180,230,115
+.byte  150,172,116,34,231,173,53,133
+.byte  226,249,55,232,28,117,223,110
+.byte  71,241,26,113,29,41,197,137
+.byte  111,183,98,14,170,24,190,27
+.byte  252,86,62,75,198,210,121,32
+.byte  154,219,192,254,120,205,90,244
+.byte  31,221,168,51,136,7,199,49
+.byte  177,18,16,89,39,128,236,95
+.byte  96,81,127,169,25,181,74,13
+.byte  45,229,122,159,147,201,156,239
+.byte  160,224,59,77,174,42,245,176
+.byte  200,235,187,60,131,83,153,97
+.byte  23,43,4,126,186,119,214,38
+.byte  225,105,20,99,85,33,12,125
+.byte  82,9,106,213,48,54,165,56
+.byte  191,64,163,158,129,243,215,251
+.byte  124,227,57,130,155,47,255,135
+.byte  52,142,67,68,196,222,233,203
+.byte  84,123,148,50,166,194,35,61
+.byte  238,76,149,11,66,250,195,78
+.byte  8,46,161,102,40,217,36,178
+.byte  118,91,162,73,109,139,209,37
+.byte  114,248,246,100,134,104,152,22
+.byte  212,164,92,204,93,101,182,146
+.byte  108,112,72,80,253,237,185,218
+.byte  94,21,70,87,167,141,157,132
+.byte  144,216,171,0,140,188,211,10
+.byte  247,228,88,5,184,179,69,6
+.byte  208,44,30,143,202,63,15,2
+.byte  193,175,189,3,1,19,138,107
+.byte  58,145,17,65,79,103,220,234
+.byte  151,242,207,206,240,180,230,115
+.byte  150,172,116,34,231,173,53,133
+.byte  226,249,55,232,28,117,223,110
+.byte  71,241,26,113,29,41,197,137
+.byte  111,183,98,14,170,24,190,27
+.byte  252,86,62,75,198,210,121,32
+.byte  154,219,192,254,120,205,90,244
+.byte  31,221,168,51,136,7,199,49
+.byte  177,18,16,89,39,128,236,95
+.byte  96,81,127,169,25,181,74,13
+.byte  45,229,122,159,147,201,156,239
+.byte  160,224,59,77,174,42,245,176
+.byte  200,235,187,60,131,83,153,97
+.byte  23,43,4,126,186,119,214,38
+.byte  225,105,20,99,85,33,12,125
+.size  _x86_AES_decrypt,.-_x86_AES_decrypt
+.globl AES_decrypt
+.type  AES_decrypt,@function
+.align 16
+AES_decrypt:
+.L_AES_decrypt_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    20(%esp),%esi
+       movl    28(%esp),%edi
+       movl    %esp,%eax
+       subl    $36,%esp
+       andl    $-64,%esp
+       leal    -127(%edi),%ebx
+       subl    %esp,%ebx
+       negl    %ebx
+       andl    $960,%ebx
+       subl    %ebx,%esp
+       addl    $4,%esp
+       movl    %eax,28(%esp)
+       call    .L010pic_point
+.L010pic_point:
+       popl    %ebp
+       leal    OPENSSL_ia32cap_P,%eax
+       leal    .LAES_Td-.L010pic_point(%ebp),%ebp
+       leal    764(%esp),%ebx
+       subl    %ebp,%ebx
+       andl    $768,%ebx
+       leal    2176(%ebp,%ebx,1),%ebp
+       btl     $25,(%eax)
+       jnc     .L011x86
+       movq    (%esi),%mm0
+       movq    8(%esi),%mm4
+       call    _sse_AES_decrypt_compact
+       movl    28(%esp),%esp
+       movl    24(%esp),%esi
+       movq    %mm0,(%esi)
+       movq    %mm4,8(%esi)
+       emms
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.align 16
+.L011x86:
+       movl    %ebp,24(%esp)
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    8(%esi),%ecx
+       movl    12(%esi),%edx
+       call    _x86_AES_decrypt_compact
+       movl    28(%esp),%esp
+       movl    24(%esp),%esi
+       movl    %eax,(%esi)
+       movl    %ebx,4(%esi)
+       movl    %ecx,8(%esi)
+       movl    %edx,12(%esi)
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  AES_decrypt,.-.L_AES_decrypt_begin
+.globl AES_cbc_encrypt
+.type  AES_cbc_encrypt,@function
+.align 16
+AES_cbc_encrypt:
+.L_AES_cbc_encrypt_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    28(%esp),%ecx
+       cmpl    $0,%ecx
+       je      .L012drop_out
+       call    .L013pic_point
+.L013pic_point:
+       popl    %ebp
+       leal    OPENSSL_ia32cap_P,%eax
+       cmpl    $0,40(%esp)
+       leal    .LAES_Te-.L013pic_point(%ebp),%ebp
+       jne     .L014picked_te
+       leal    .LAES_Td-.LAES_Te(%ebp),%ebp
+.L014picked_te:
+       pushfl
+       cld
+       cmpl    $512,%ecx
+       jb      .L015slow_way
+       testl   $15,%ecx
+       jnz     .L015slow_way
+       btl     $28,(%eax)
+       jc      .L015slow_way
+       leal    -324(%esp),%esi
+       andl    $-64,%esi
+       movl    %ebp,%eax
+       leal    2304(%ebp),%ebx
+       movl    %esi,%edx
+       andl    $4095,%eax
+       andl    $4095,%ebx
+       andl    $4095,%edx
+       cmpl    %ebx,%edx
+       jb      .L016tbl_break_out
+       subl    %ebx,%edx
+       subl    %edx,%esi
+       jmp     .L017tbl_ok
+.align 4
+.L016tbl_break_out:
+       subl    %eax,%edx
+       andl    $4095,%edx
+       addl    $384,%edx
+       subl    %edx,%esi
+.align 4
+.L017tbl_ok:
+       leal    24(%esp),%edx
+       xchgl   %esi,%esp
+       addl    $4,%esp
+       movl    %ebp,24(%esp)
+       movl    %esi,28(%esp)
+       movl    (%edx),%eax
+       movl    4(%edx),%ebx
+       movl    12(%edx),%edi
+       movl    16(%edx),%esi
+       movl    20(%edx),%edx
+       movl    %eax,32(%esp)
+       movl    %ebx,36(%esp)
+       movl    %ecx,40(%esp)
+       movl    %edi,44(%esp)
+       movl    %esi,48(%esp)
+       movl    $0,316(%esp)
+       movl    %edi,%ebx
+       movl    $61,%ecx
+       subl    %ebp,%ebx
+       movl    %edi,%esi
+       andl    $4095,%ebx
+       leal    76(%esp),%edi
+       cmpl    $2304,%ebx
+       jb      .L018do_copy
+       cmpl    $3852,%ebx
+       jb      .L019skip_copy
+.align 4
+.L018do_copy:
+       movl    %edi,44(%esp)
+.long  2784229001
+.L019skip_copy:
+       movl    $16,%edi
+.align 4
+.L020prefetch_tbl:
+       movl    (%ebp),%eax
+       movl    32(%ebp),%ebx
+       movl    64(%ebp),%ecx
+       movl    96(%ebp),%esi
+       leal    128(%ebp),%ebp
+       subl    $1,%edi
+       jnz     .L020prefetch_tbl
+       subl    $2048,%ebp
+       movl    32(%esp),%esi
+       movl    48(%esp),%edi
+       cmpl    $0,%edx
+       je      .L021fast_decrypt
+       movl    (%edi),%eax
+       movl    4(%edi),%ebx
+.align 16
+.L022fast_enc_loop:
+       movl    8(%edi),%ecx
+       movl    12(%edi),%edx
+       xorl    (%esi),%eax
+       xorl    4(%esi),%ebx
+       xorl    8(%esi),%ecx
+       xorl    12(%esi),%edx
+       movl    44(%esp),%edi
+       call    _x86_AES_encrypt
+       movl    32(%esp),%esi
+       movl    36(%esp),%edi
+       movl    %eax,(%edi)
+       movl    %ebx,4(%edi)
+       movl    %ecx,8(%edi)
+       movl    %edx,12(%edi)
+       leal    16(%esi),%esi
+       movl    40(%esp),%ecx
+       movl    %esi,32(%esp)
+       leal    16(%edi),%edx
+       movl    %edx,36(%esp)
+       subl    $16,%ecx
+       movl    %ecx,40(%esp)
+       jnz     .L022fast_enc_loop
+       movl    48(%esp),%esi
+       movl    8(%edi),%ecx
+       movl    12(%edi),%edx
+       movl    %eax,(%esi)
+       movl    %ebx,4(%esi)
+       movl    %ecx,8(%esi)
+       movl    %edx,12(%esi)
+       cmpl    $0,316(%esp)
+       movl    44(%esp),%edi
+       je      .L023skip_ezero
+       movl    $60,%ecx
+       xorl    %eax,%eax
+.align 4
+.long  2884892297
+.L023skip_ezero:
+       movl    28(%esp),%esp
+       popfl
+.L012drop_out:
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+       pushfl
+.align 16
+.L021fast_decrypt:
+       cmpl    36(%esp),%esi
+       je      .L024fast_dec_in_place
+       movl    %edi,52(%esp)
+.align 4
+.align 16
+.L025fast_dec_loop:
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    8(%esi),%ecx
+       movl    12(%esi),%edx
+       movl    44(%esp),%edi
+       call    _x86_AES_decrypt
+       movl    52(%esp),%edi
+       movl    40(%esp),%esi
+       xorl    (%edi),%eax
+       xorl    4(%edi),%ebx
+       xorl    8(%edi),%ecx
+       xorl    12(%edi),%edx
+       movl    36(%esp),%edi
+       movl    32(%esp),%esi
+       movl    %eax,(%edi)
+       movl    %ebx,4(%edi)
+       movl    %ecx,8(%edi)
+       movl    %edx,12(%edi)
+       movl    40(%esp),%ecx
+       movl    %esi,52(%esp)
+       leal    16(%esi),%esi
+       movl    %esi,32(%esp)
+       leal    16(%edi),%edi
+       movl    %edi,36(%esp)
+       subl    $16,%ecx
+       movl    %ecx,40(%esp)
+       jnz     .L025fast_dec_loop
+       movl    52(%esp),%edi
+       movl    48(%esp),%esi
+       movl    (%edi),%eax
+       movl    4(%edi),%ebx
+       movl    8(%edi),%ecx
+       movl    12(%edi),%edx
+       movl    %eax,(%esi)
+       movl    %ebx,4(%esi)
+       movl    %ecx,8(%esi)
+       movl    %edx,12(%esi)
+       jmp     .L026fast_dec_out
+.align 16
+.L024fast_dec_in_place:
+.L027fast_dec_in_place_loop:
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    8(%esi),%ecx
+       movl    12(%esi),%edx
+       leal    60(%esp),%edi
+       movl    %eax,(%edi)
+       movl    %ebx,4(%edi)
+       movl    %ecx,8(%edi)
+       movl    %edx,12(%edi)
+       movl    44(%esp),%edi
+       call    _x86_AES_decrypt
+       movl    48(%esp),%edi
+       movl    36(%esp),%esi
+       xorl    (%edi),%eax
+       xorl    4(%edi),%ebx
+       xorl    8(%edi),%ecx
+       xorl    12(%edi),%edx
+       movl    %eax,(%esi)
+       movl    %ebx,4(%esi)
+       movl    %ecx,8(%esi)
+       movl    %edx,12(%esi)
+       leal    16(%esi),%esi
+       movl    %esi,36(%esp)
+       leal    60(%esp),%esi
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    8(%esi),%ecx
+       movl    12(%esi),%edx
+       movl    %eax,(%edi)
+       movl    %ebx,4(%edi)
+       movl    %ecx,8(%edi)
+       movl    %edx,12(%edi)
+       movl    32(%esp),%esi
+       movl    40(%esp),%ecx
+       leal    16(%esi),%esi
+       movl    %esi,32(%esp)
+       subl    $16,%ecx
+       movl    %ecx,40(%esp)
+       jnz     .L027fast_dec_in_place_loop
+.align 4
+.L026fast_dec_out:
+       cmpl    $0,316(%esp)
+       movl    44(%esp),%edi
+       je      .L028skip_dzero
+       movl    $60,%ecx
+       xorl    %eax,%eax
+.align 4
+.long  2884892297
+.L028skip_dzero:
+       movl    28(%esp),%esp
+       popfl
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+       pushfl
+.align 16
+.L015slow_way:
+       movl    (%eax),%eax
+       movl    36(%esp),%edi
+       leal    -80(%esp),%esi
+       andl    $-64,%esi
+       leal    -143(%edi),%ebx
+       subl    %esi,%ebx
+       negl    %ebx
+       andl    $960,%ebx
+       subl    %ebx,%esi
+       leal    768(%esi),%ebx
+       subl    %ebp,%ebx
+       andl    $768,%ebx
+       leal    2176(%ebp,%ebx,1),%ebp
+       leal    24(%esp),%edx
+       xchgl   %esi,%esp
+       addl    $4,%esp
+       movl    %ebp,24(%esp)
+       movl    %esi,28(%esp)
+       movl    %eax,52(%esp)
+       movl    (%edx),%eax
+       movl    4(%edx),%ebx
+       movl    16(%edx),%esi
+       movl    20(%edx),%edx
+       movl    %eax,32(%esp)
+       movl    %ebx,36(%esp)
+       movl    %ecx,40(%esp)
+       movl    %edi,44(%esp)
+       movl    %esi,48(%esp)
+       movl    %esi,%edi
+       movl    %eax,%esi
+       cmpl    $0,%edx
+       je      .L029slow_decrypt
+       cmpl    $16,%ecx
+       movl    %ebx,%edx
+       jb      .L030slow_enc_tail
+       btl     $25,52(%esp)
+       jnc     .L031slow_enc_x86
+       movq    (%edi),%mm0
+       movq    8(%edi),%mm4
+.align 16
+.L032slow_enc_loop_sse:
+       pxor    (%esi),%mm0
+       pxor    8(%esi),%mm4
+       movl    44(%esp),%edi
+       call    _sse_AES_encrypt_compact
+       movl    32(%esp),%esi
+       movl    36(%esp),%edi
+       movl    40(%esp),%ecx
+       movq    %mm0,(%edi)
+       movq    %mm4,8(%edi)
+       leal    16(%esi),%esi
+       movl    %esi,32(%esp)
+       leal    16(%edi),%edx
+       movl    %edx,36(%esp)
+       subl    $16,%ecx
+       cmpl    $16,%ecx
+       movl    %ecx,40(%esp)
+       jae     .L032slow_enc_loop_sse
+       testl   $15,%ecx
+       jnz     .L030slow_enc_tail
+       movl    48(%esp),%esi
+       movq    %mm0,(%esi)
+       movq    %mm4,8(%esi)
+       emms
+       movl    28(%esp),%esp
+       popfl
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+       pushfl
+.align 16
+.L031slow_enc_x86:
+       movl    (%edi),%eax
+       movl    4(%edi),%ebx
+.align 4
+.L033slow_enc_loop_x86:
+       movl    8(%edi),%ecx
+       movl    12(%edi),%edx
+       xorl    (%esi),%eax
+       xorl    4(%esi),%ebx
+       xorl    8(%esi),%ecx
+       xorl    12(%esi),%edx
+       movl    44(%esp),%edi
+       call    _x86_AES_encrypt_compact
+       movl    32(%esp),%esi
+       movl    36(%esp),%edi
+       movl    %eax,(%edi)
+       movl    %ebx,4(%edi)
+       movl    %ecx,8(%edi)
+       movl    %edx,12(%edi)
+       movl    40(%esp),%ecx
+       leal    16(%esi),%esi
+       movl    %esi,32(%esp)
+       leal    16(%edi),%edx
+       movl    %edx,36(%esp)
+       subl    $16,%ecx
+       cmpl    $16,%ecx
+       movl    %ecx,40(%esp)
+       jae     .L033slow_enc_loop_x86
+       testl   $15,%ecx
+       jnz     .L030slow_enc_tail
+       movl    48(%esp),%esi
+       movl    8(%edi),%ecx
+       movl    12(%edi),%edx
+       movl    %eax,(%esi)
+       movl    %ebx,4(%esi)
+       movl    %ecx,8(%esi)
+       movl    %edx,12(%esi)
+       movl    28(%esp),%esp
+       popfl
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+       pushfl
+.align 16
+.L030slow_enc_tail:
+       emms
+       movl    %edx,%edi
+       movl    $16,%ebx
+       subl    %ecx,%ebx
+       cmpl    %esi,%edi
+       je      .L034enc_in_place
+.align 4
+.long  2767451785
+       jmp     .L035enc_skip_in_place
+.L034enc_in_place:
+       leal    (%edi,%ecx,1),%edi
+.L035enc_skip_in_place:
+       movl    %ebx,%ecx
+       xorl    %eax,%eax
+.align 4
+.long  2868115081
+       movl    48(%esp),%edi
+       movl    %edx,%esi
+       movl    (%edi),%eax
+       movl    4(%edi),%ebx
+       movl    $16,40(%esp)
+       jmp     .L033slow_enc_loop_x86
+.align 16
+.L029slow_decrypt:
+       btl     $25,52(%esp)
+       jnc     .L036slow_dec_loop_x86
+.align 4
+.L037slow_dec_loop_sse:
+       movq    (%esi),%mm0
+       movq    8(%esi),%mm4
+       movl    44(%esp),%edi
+       call    _sse_AES_decrypt_compact
+       movl    32(%esp),%esi
+       leal    60(%esp),%eax
+       movl    36(%esp),%ebx
+       movl    40(%esp),%ecx
+       movl    48(%esp),%edi
+       movq    (%esi),%mm1
+       movq    8(%esi),%mm5
+       pxor    (%edi),%mm0
+       pxor    8(%edi),%mm4
+       movq    %mm1,(%edi)
+       movq    %mm5,8(%edi)
+       subl    $16,%ecx
+       jc      .L038slow_dec_partial_sse
+       movq    %mm0,(%ebx)
+       movq    %mm4,8(%ebx)
+       leal    16(%ebx),%ebx
+       movl    %ebx,36(%esp)
+       leal    16(%esi),%esi
+       movl    %esi,32(%esp)
+       movl    %ecx,40(%esp)
+       jnz     .L037slow_dec_loop_sse
+       emms
+       movl    28(%esp),%esp
+       popfl
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+       pushfl
+.align 16
+.L038slow_dec_partial_sse:
+       movq    %mm0,(%eax)
+       movq    %mm4,8(%eax)
+       emms
+       addl    $16,%ecx
+       movl    %ebx,%edi
+       movl    %eax,%esi
+.align 4
+.long  2767451785
+       movl    28(%esp),%esp
+       popfl
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+       pushfl
+.align 16
+.L036slow_dec_loop_x86:
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    8(%esi),%ecx
+       movl    12(%esi),%edx
+       leal    60(%esp),%edi
+       movl    %eax,(%edi)
+       movl    %ebx,4(%edi)
+       movl    %ecx,8(%edi)
+       movl    %edx,12(%edi)
+       movl    44(%esp),%edi
+       call    _x86_AES_decrypt_compact
+       movl    48(%esp),%edi
+       movl    40(%esp),%esi
+       xorl    (%edi),%eax
+       xorl    4(%edi),%ebx
+       xorl    8(%edi),%ecx
+       xorl    12(%edi),%edx
+       subl    $16,%esi
+       jc      .L039slow_dec_partial_x86
+       movl    %esi,40(%esp)
+       movl    36(%esp),%esi
+       movl    %eax,(%esi)
+       movl    %ebx,4(%esi)
+       movl    %ecx,8(%esi)
+       movl    %edx,12(%esi)
+       leal    16(%esi),%esi
+       movl    %esi,36(%esp)
+       leal    60(%esp),%esi
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    8(%esi),%ecx
+       movl    12(%esi),%edx
+       movl    %eax,(%edi)
+       movl    %ebx,4(%edi)
+       movl    %ecx,8(%edi)
+       movl    %edx,12(%edi)
+       movl    32(%esp),%esi
+       leal    16(%esi),%esi
+       movl    %esi,32(%esp)
+       jnz     .L036slow_dec_loop_x86
+       movl    28(%esp),%esp
+       popfl
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+       pushfl
+.align 16
+.L039slow_dec_partial_x86:
+       leal    60(%esp),%esi
+       movl    %eax,(%esi)
+       movl    %ebx,4(%esi)
+       movl    %ecx,8(%esi)
+       movl    %edx,12(%esi)
+       movl    32(%esp),%esi
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    8(%esi),%ecx
+       movl    12(%esi),%edx
+       movl    %eax,(%edi)
+       movl    %ebx,4(%edi)
+       movl    %ecx,8(%edi)
+       movl    %edx,12(%edi)
+       movl    40(%esp),%ecx
+       movl    36(%esp),%edi
+       leal    60(%esp),%esi
+.align 4
+.long  2767451785
+       movl    28(%esp),%esp
+       popfl
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  AES_cbc_encrypt,.-.L_AES_cbc_encrypt_begin
+.type  _x86_AES_set_encrypt_key,@function
+.align 16
+_x86_AES_set_encrypt_key:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    24(%esp),%esi
+       movl    32(%esp),%edi
+       testl   $-1,%esi
+       jz      .L040badpointer
+       testl   $-1,%edi
+       jz      .L040badpointer
+       call    .L041pic_point
+.L041pic_point:
+       popl    %ebp
+       leal    .LAES_Te-.L041pic_point(%ebp),%ebp
+       leal    2176(%ebp),%ebp
+       movl    -128(%ebp),%eax
+       movl    -96(%ebp),%ebx
+       movl    -64(%ebp),%ecx
+       movl    -32(%ebp),%edx
+       movl    (%ebp),%eax
+       movl    32(%ebp),%ebx
+       movl    64(%ebp),%ecx
+       movl    96(%ebp),%edx
+       movl    28(%esp),%ecx
+       cmpl    $128,%ecx
+       je      .L04210rounds
+       cmpl    $192,%ecx
+       je      .L04312rounds
+       cmpl    $256,%ecx
+       je      .L04414rounds
+       movl    $-2,%eax
+       jmp     .L045exit
+.L04210rounds:
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    8(%esi),%ecx
+       movl    12(%esi),%edx
+       movl    %eax,(%edi)
+       movl    %ebx,4(%edi)
+       movl    %ecx,8(%edi)
+       movl    %edx,12(%edi)
+       xorl    %ecx,%ecx
+       jmp     .L04610shortcut
+.align 4
+.L04710loop:
+       movl    (%edi),%eax
+       movl    12(%edi),%edx
+.L04610shortcut:
+       movzbl  %dl,%esi
+       movzbl  -128(%ebp,%esi,1),%ebx
+       movzbl  %dh,%esi
+       shll    $24,%ebx
+       xorl    %ebx,%eax
+       movzbl  -128(%ebp,%esi,1),%ebx
+       shrl    $16,%edx
+       movzbl  %dl,%esi
+       xorl    %ebx,%eax
+       movzbl  -128(%ebp,%esi,1),%ebx
+       movzbl  %dh,%esi
+       shll    $8,%ebx
+       xorl    %ebx,%eax
+       movzbl  -128(%ebp,%esi,1),%ebx
+       shll    $16,%ebx
+       xorl    %ebx,%eax
+       xorl    896(%ebp,%ecx,4),%eax
+       movl    %eax,16(%edi)
+       xorl    4(%edi),%eax
+       movl    %eax,20(%edi)
+       xorl    8(%edi),%eax
+       movl    %eax,24(%edi)
+       xorl    12(%edi),%eax
+       movl    %eax,28(%edi)
+       incl    %ecx
+       addl    $16,%edi
+       cmpl    $10,%ecx
+       jl      .L04710loop
+       movl    $10,80(%edi)
+       xorl    %eax,%eax
+       jmp     .L045exit
+.L04312rounds:
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    8(%esi),%ecx
+       movl    12(%esi),%edx
+       movl    %eax,(%edi)
+       movl    %ebx,4(%edi)
+       movl    %ecx,8(%edi)
+       movl    %edx,12(%edi)
+       movl    16(%esi),%ecx
+       movl    20(%esi),%edx
+       movl    %ecx,16(%edi)
+       movl    %edx,20(%edi)
+       xorl    %ecx,%ecx
+       jmp     .L04812shortcut
+.align 4
+.L04912loop:
+       movl    (%edi),%eax
+       movl    20(%edi),%edx
+.L04812shortcut:
+       movzbl  %dl,%esi
+       movzbl  -128(%ebp,%esi,1),%ebx
+       movzbl  %dh,%esi
+       shll    $24,%ebx
+       xorl    %ebx,%eax
+       movzbl  -128(%ebp,%esi,1),%ebx
+       shrl    $16,%edx
+       movzbl  %dl,%esi
+       xorl    %ebx,%eax
+       movzbl  -128(%ebp,%esi,1),%ebx
+       movzbl  %dh,%esi
+       shll    $8,%ebx
+       xorl    %ebx,%eax
+       movzbl  -128(%ebp,%esi,1),%ebx
+       shll    $16,%ebx
+       xorl    %ebx,%eax
+       xorl    896(%ebp,%ecx,4),%eax
+       movl    %eax,24(%edi)
+       xorl    4(%edi),%eax
+       movl    %eax,28(%edi)
+       xorl    8(%edi),%eax
+       movl    %eax,32(%edi)
+       xorl    12(%edi),%eax
+       movl    %eax,36(%edi)
+       cmpl    $7,%ecx
+       je      .L05012break
+       incl    %ecx
+       xorl    16(%edi),%eax
+       movl    %eax,40(%edi)
+       xorl    20(%edi),%eax
+       movl    %eax,44(%edi)
+       addl    $24,%edi
+       jmp     .L04912loop
+.L05012break:
+       movl    $12,72(%edi)
+       xorl    %eax,%eax
+       jmp     .L045exit
+.L04414rounds:
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    8(%esi),%ecx
+       movl    12(%esi),%edx
+       movl    %eax,(%edi)
+       movl    %ebx,4(%edi)
+       movl    %ecx,8(%edi)
+       movl    %edx,12(%edi)
+       movl    16(%esi),%eax
+       movl    20(%esi),%ebx
+       movl    24(%esi),%ecx
+       movl    28(%esi),%edx
+       movl    %eax,16(%edi)
+       movl    %ebx,20(%edi)
+       movl    %ecx,24(%edi)
+       movl    %edx,28(%edi)
+       xorl    %ecx,%ecx
+       jmp     .L05114shortcut
+.align 4
+.L05214loop:
+       movl    28(%edi),%edx
+.L05114shortcut:
+       movl    (%edi),%eax
+       movzbl  %dl,%esi
+       movzbl  -128(%ebp,%esi,1),%ebx
+       movzbl  %dh,%esi
+       shll    $24,%ebx
+       xorl    %ebx,%eax
+       movzbl  -128(%ebp,%esi,1),%ebx
+       shrl    $16,%edx
+       movzbl  %dl,%esi
+       xorl    %ebx,%eax
+       movzbl  -128(%ebp,%esi,1),%ebx
+       movzbl  %dh,%esi
+       shll    $8,%ebx
+       xorl    %ebx,%eax
+       movzbl  -128(%ebp,%esi,1),%ebx
+       shll    $16,%ebx
+       xorl    %ebx,%eax
+       xorl    896(%ebp,%ecx,4),%eax
+       movl    %eax,32(%edi)
+       xorl    4(%edi),%eax
+       movl    %eax,36(%edi)
+       xorl    8(%edi),%eax
+       movl    %eax,40(%edi)
+       xorl    12(%edi),%eax
+       movl    %eax,44(%edi)
+       cmpl    $6,%ecx
+       je      .L05314break
+       incl    %ecx
+       movl    %eax,%edx
+       movl    16(%edi),%eax
+       movzbl  %dl,%esi
+       movzbl  -128(%ebp,%esi,1),%ebx
+       movzbl  %dh,%esi
+       xorl    %ebx,%eax
+       movzbl  -128(%ebp,%esi,1),%ebx
+       shrl    $16,%edx
+       shll    $8,%ebx
+       movzbl  %dl,%esi
+       xorl    %ebx,%eax
+       movzbl  -128(%ebp,%esi,1),%ebx
+       movzbl  %dh,%esi
+       shll    $16,%ebx
+       xorl    %ebx,%eax
+       movzbl  -128(%ebp,%esi,1),%ebx
+       shll    $24,%ebx
+       xorl    %ebx,%eax
+       movl    %eax,48(%edi)
+       xorl    20(%edi),%eax
+       movl    %eax,52(%edi)
+       xorl    24(%edi),%eax
+       movl    %eax,56(%edi)
+       xorl    28(%edi),%eax
+       movl    %eax,60(%edi)
+       addl    $32,%edi
+       jmp     .L05214loop
+.L05314break:
+       movl    $14,48(%edi)
+       xorl    %eax,%eax
+       jmp     .L045exit
+.L040badpointer:
+       movl    $-1,%eax
+.L045exit:
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  _x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key
+.globl AES_set_encrypt_key
+.type  AES_set_encrypt_key,@function
+.align 16
+AES_set_encrypt_key:
+.L_AES_set_encrypt_key_begin:
+       call    _x86_AES_set_encrypt_key
+       ret
+.size  AES_set_encrypt_key,.-.L_AES_set_encrypt_key_begin
+.globl AES_set_decrypt_key
+.type  AES_set_decrypt_key,@function
+.align 16
+AES_set_decrypt_key:
+.L_AES_set_decrypt_key_begin:
+       call    _x86_AES_set_encrypt_key
+       cmpl    $0,%eax
+       je      .L054proceed
+       ret
+.L054proceed:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    28(%esp),%esi
+       movl    240(%esi),%ecx
+       leal    (,%ecx,4),%ecx
+       leal    (%esi,%ecx,4),%edi
+.align 4
+.L055invert:
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    (%edi),%ecx
+       movl    4(%edi),%edx
+       movl    %eax,(%edi)
+       movl    %ebx,4(%edi)
+       movl    %ecx,(%esi)
+       movl    %edx,4(%esi)
+       movl    8(%esi),%eax
+       movl    12(%esi),%ebx
+       movl    8(%edi),%ecx
+       movl    12(%edi),%edx
+       movl    %eax,8(%edi)
+       movl    %ebx,12(%edi)
+       movl    %ecx,8(%esi)
+       movl    %edx,12(%esi)
+       addl    $16,%esi
+       subl    $16,%edi
+       cmpl    %edi,%esi
+       jne     .L055invert
+       movl    28(%esp),%edi
+       movl    240(%edi),%esi
+       leal    -2(%esi,%esi,1),%esi
+       leal    (%edi,%esi,8),%esi
+       movl    %esi,28(%esp)
+       movl    16(%edi),%eax
+.align 4
+.L056permute:
+       addl    $16,%edi
+       movl    %eax,%esi
+       andl    $2155905152,%esi
+       movl    %esi,%ebp
+       shrl    $7,%ebp
+       leal    (%eax,%eax,1),%ebx
+       subl    %ebp,%esi
+       andl    $4278124286,%ebx
+       andl    $454761243,%esi
+       xorl    %ebx,%esi
+       movl    %esi,%ebx
+       andl    $2155905152,%esi
+       movl    %esi,%ebp
+       shrl    $7,%ebp
+       leal    (%ebx,%ebx,1),%ecx
+       subl    %ebp,%esi
+       andl    $4278124286,%ecx
+       andl    $454761243,%esi
+       xorl    %eax,%ebx
+       xorl    %ecx,%esi
+       movl    %esi,%ecx
+       andl    $2155905152,%esi
+       movl    %esi,%ebp
+       shrl    $7,%ebp
+       leal    (%ecx,%ecx,1),%edx
+       xorl    %eax,%ecx
+       subl    %ebp,%esi
+       andl    $4278124286,%edx
+       andl    $454761243,%esi
+       roll    $8,%eax
+       xorl    %esi,%edx
+       movl    4(%edi),%ebp
+       xorl    %ebx,%eax
+       xorl    %edx,%ebx
+       xorl    %ecx,%eax
+       roll    $24,%ebx
+       xorl    %edx,%ecx
+       xorl    %edx,%eax
+       roll    $16,%ecx
+       xorl    %ebx,%eax
+       roll    $8,%edx
+       xorl    %ecx,%eax
+       movl    %ebp,%ebx
+       xorl    %edx,%eax
+       movl    %eax,(%edi)
+       movl    %ebx,%esi
+       andl    $2155905152,%esi
+       movl    %esi,%ebp
+       shrl    $7,%ebp
+       leal    (%ebx,%ebx,1),%ecx
+       subl    %ebp,%esi
+       andl    $4278124286,%ecx
+       andl    $454761243,%esi
+       xorl    %ecx,%esi
+       movl    %esi,%ecx
+       andl    $2155905152,%esi
+       movl    %esi,%ebp
+       shrl    $7,%ebp
+       leal    (%ecx,%ecx,1),%edx
+       subl    %ebp,%esi
+       andl    $4278124286,%edx
+       andl    $454761243,%esi
+       xorl    %ebx,%ecx
+       xorl    %edx,%esi
+       movl    %esi,%edx
+       andl    $2155905152,%esi
+       movl    %esi,%ebp
+       shrl    $7,%ebp
+       leal    (%edx,%edx,1),%eax
+       xorl    %ebx,%edx
+       subl    %ebp,%esi
+       andl    $4278124286,%eax
+       andl    $454761243,%esi
+       roll    $8,%ebx
+       xorl    %esi,%eax
+       movl    8(%edi),%ebp
+       xorl    %ecx,%ebx
+       xorl    %eax,%ecx
+       xorl    %edx,%ebx
+       roll    $24,%ecx
+       xorl    %eax,%edx
+       xorl    %eax,%ebx
+       roll    $16,%edx
+       xorl    %ecx,%ebx
+       roll    $8,%eax
+       xorl    %edx,%ebx
+       movl    %ebp,%ecx
+       xorl    %eax,%ebx
+       movl    %ebx,4(%edi)
+       movl    %ecx,%esi
+       andl    $2155905152,%esi
+       movl    %esi,%ebp
+       shrl    $7,%ebp
+       leal    (%ecx,%ecx,1),%edx
+       subl    %ebp,%esi
+       andl    $4278124286,%edx
+       andl    $454761243,%esi
+       xorl    %edx,%esi
+       movl    %esi,%edx
+       andl    $2155905152,%esi
+       movl    %esi,%ebp
+       shrl    $7,%ebp
+       leal    (%edx,%edx,1),%eax
+       subl    %ebp,%esi
+       andl    $4278124286,%eax
+       andl    $454761243,%esi
+       xorl    %ecx,%edx
+       xorl    %eax,%esi
+       movl    %esi,%eax
+       andl    $2155905152,%esi
+       movl    %esi,%ebp
+       shrl    $7,%ebp
+       leal    (%eax,%eax,1),%ebx
+       xorl    %ecx,%eax
+       subl    %ebp,%esi
+       andl    $4278124286,%ebx
+       andl    $454761243,%esi
+       roll    $8,%ecx
+       xorl    %esi,%ebx
+       movl    12(%edi),%ebp
+       xorl    %edx,%ecx
+       xorl    %ebx,%edx
+       xorl    %eax,%ecx
+       roll    $24,%edx
+       xorl    %ebx,%eax
+       xorl    %ebx,%ecx
+       roll    $16,%eax
+       xorl    %edx,%ecx
+       roll    $8,%ebx
+       xorl    %eax,%ecx
+       movl    %ebp,%edx
+       xorl    %ebx,%ecx
+       movl    %ecx,8(%edi)
+       movl    %edx,%esi
+       andl    $2155905152,%esi
+       movl    %esi,%ebp
+       shrl    $7,%ebp
+       leal    (%edx,%edx,1),%eax
+       subl    %ebp,%esi
+       andl    $4278124286,%eax
+       andl    $454761243,%esi
+       xorl    %eax,%esi
+       movl    %esi,%eax
+       andl    $2155905152,%esi
+       movl    %esi,%ebp
+       shrl    $7,%ebp
+       leal    (%eax,%eax,1),%ebx
+       subl    %ebp,%esi
+       andl    $4278124286,%ebx
+       andl    $454761243,%esi
+       xorl    %edx,%eax
+       xorl    %ebx,%esi
+       movl    %esi,%ebx
+       andl    $2155905152,%esi
+       movl    %esi,%ebp
+       shrl    $7,%ebp
+       leal    (%ebx,%ebx,1),%ecx
+       xorl    %edx,%ebx
+       subl    %ebp,%esi
+       andl    $4278124286,%ecx
+       andl    $454761243,%esi
+       roll    $8,%edx
+       xorl    %esi,%ecx
+       movl    16(%edi),%ebp
+       xorl    %eax,%edx
+       xorl    %ecx,%eax
+       xorl    %ebx,%edx
+       roll    $24,%eax
+       xorl    %ecx,%ebx
+       xorl    %ecx,%edx
+       roll    $16,%ebx
+       xorl    %eax,%edx
+       roll    $8,%ecx
+       xorl    %ebx,%edx
+       movl    %ebp,%eax
+       xorl    %ecx,%edx
+       movl    %edx,12(%edi)
+       cmpl    28(%esp),%edi
+       jb      .L056permute
+       xorl    %eax,%eax
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  AES_set_decrypt_key,.-.L_AES_set_decrypt_key_begin
+.byte  65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
+.byte  80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
+.byte  111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.comm  OPENSSL_ia32cap_P,4,4
diff --git a/secure/lib/libcrypto/asm/aes-x86_64.s b/secure/lib/libcrypto/asm/aes-x86_64.s
new file mode 100644 (file)
index 0000000..d000a57
--- /dev/null
@@ -0,0 +1,2532 @@
+.text  
+.type  _x86_64_AES_encrypt,@function
+.align 16
+_x86_64_AES_encrypt:
+       xorl    0(%r15),%eax
+       xorl    4(%r15),%ebx
+       xorl    8(%r15),%ecx
+       xorl    12(%r15),%edx
+
+       movl    240(%r15),%r13d
+       subl    $1,%r13d
+       jmp     .Lenc_loop
+.align 16
+.Lenc_loop:
+
+       movzbl  %al,%esi
+       movzbl  %bl,%edi
+       movzbl  %cl,%ebp
+       movl    0(%r14,%rsi,8),%r10d
+       movl    0(%r14,%rdi,8),%r11d
+       movl    0(%r14,%rbp,8),%r12d
+
+       movzbl  %bh,%esi
+       movzbl  %ch,%edi
+       movzbl  %dl,%ebp
+       xorl    3(%r14,%rsi,8),%r10d
+       xorl    3(%r14,%rdi,8),%r11d
+       movl    0(%r14,%rbp,8),%r8d
+
+       movzbl  %dh,%esi
+       shrl    $16,%ecx
+       movzbl  %ah,%ebp
+       xorl    3(%r14,%rsi,8),%r12d
+       shrl    $16,%edx
+       xorl    3(%r14,%rbp,8),%r8d
+
+       shrl    $16,%ebx
+       leaq    16(%r15),%r15
+       shrl    $16,%eax
+
+       movzbl  %cl,%esi
+       movzbl  %dl,%edi
+       movzbl  %al,%ebp
+       xorl    2(%r14,%rsi,8),%r10d
+       xorl    2(%r14,%rdi,8),%r11d
+       xorl    2(%r14,%rbp,8),%r12d
+
+       movzbl  %dh,%esi
+       movzbl  %ah,%edi
+       movzbl  %bl,%ebp
+       xorl    1(%r14,%rsi,8),%r10d
+       xorl    1(%r14,%rdi,8),%r11d
+       xorl    2(%r14,%rbp,8),%r8d
+
+       movl    12(%r15),%edx
+       movzbl  %bh,%edi
+       movzbl  %ch,%ebp
+       movl    0(%r15),%eax
+       xorl    1(%r14,%rdi,8),%r12d
+       xorl    1(%r14,%rbp,8),%r8d
+
+       movl    4(%r15),%ebx
+       movl    8(%r15),%ecx
+       xorl    %r10d,%eax
+       xorl    %r11d,%ebx
+       xorl    %r12d,%ecx
+       xorl    %r8d,%edx
+       subl    $1,%r13d
+       jnz     .Lenc_loop
+       movzbl  %al,%esi
+       movzbl  %bl,%edi
+       movzbl  %cl,%ebp
+       movzbl  2(%r14,%rsi,8),%r10d
+       movzbl  2(%r14,%rdi,8),%r11d
+       movzbl  2(%r14,%rbp,8),%r12d
+
+       movzbl  %dl,%esi
+       movzbl  %bh,%edi
+       movzbl  %ch,%ebp
+       movzbl  2(%r14,%rsi,8),%r8d
+       movl    0(%r14,%rdi,8),%edi
+       movl    0(%r14,%rbp,8),%ebp
+
+       andl    $65280,%edi
+       andl    $65280,%ebp
+
+       xorl    %edi,%r10d
+       xorl    %ebp,%r11d
+       shrl    $16,%ecx
+
+       movzbl  %dh,%esi
+       movzbl  %ah,%edi
+       shrl    $16,%edx
+       movl    0(%r14,%rsi,8),%esi
+       movl    0(%r14,%rdi,8),%edi
+
+       andl    $65280,%esi
+       andl    $65280,%edi
+       shrl    $16,%ebx
+       xorl    %esi,%r12d
+       xorl    %edi,%r8d
+       shrl    $16,%eax
+
+       movzbl  %cl,%esi
+       movzbl  %dl,%edi
+       movzbl  %al,%ebp
+       movl    0(%r14,%rsi,8),%esi
+       movl    0(%r14,%rdi,8),%edi
+       movl    0(%r14,%rbp,8),%ebp
+
+       andl    $16711680,%esi
+       andl    $16711680,%edi
+       andl    $16711680,%ebp
+
+       xorl    %esi,%r10d
+       xorl    %edi,%r11d
+       xorl    %ebp,%r12d
+
+       movzbl  %bl,%esi
+       movzbl  %dh,%edi
+       movzbl  %ah,%ebp
+       movl    0(%r14,%rsi,8),%esi
+       movl    2(%r14,%rdi,8),%edi
+       movl    2(%r14,%rbp,8),%ebp
+
+       andl    $16711680,%esi
+       andl    $4278190080,%edi
+       andl    $4278190080,%ebp
+
+       xorl    %esi,%r8d
+       xorl    %edi,%r10d
+       xorl    %ebp,%r11d
+
+       movzbl  %bh,%esi
+       movzbl  %ch,%edi
+       movl    16+12(%r15),%edx
+       movl    2(%r14,%rsi,8),%esi
+       movl    2(%r14,%rdi,8),%edi
+       movl    16+0(%r15),%eax
+
+       andl    $4278190080,%esi
+       andl    $4278190080,%edi
+
+       xorl    %esi,%r12d
+       xorl    %edi,%r8d
+
+       movl    16+4(%r15),%ebx
+       movl    16+8(%r15),%ecx
+       xorl    %r10d,%eax
+       xorl    %r11d,%ebx
+       xorl    %r12d,%ecx
+       xorl    %r8d,%edx
+.byte  0xf3,0xc3                       
+.size  _x86_64_AES_encrypt,.-_x86_64_AES_encrypt
+.type  _x86_64_AES_encrypt_compact,@function
+.align 16
+_x86_64_AES_encrypt_compact:
+       leaq    128(%r14),%r8
+       movl    0-128(%r8),%edi
+       movl    32-128(%r8),%ebp
+       movl    64-128(%r8),%r10d
+       movl    96-128(%r8),%r11d
+       movl    128-128(%r8),%edi
+       movl    160-128(%r8),%ebp
+       movl    192-128(%r8),%r10d
+       movl    224-128(%r8),%r11d
+       jmp     .Lenc_loop_compact
+.align 16
+.Lenc_loop_compact:
+       xorl    0(%r15),%eax
+       xorl    4(%r15),%ebx
+       xorl    8(%r15),%ecx
+       xorl    12(%r15),%edx
+       leaq    16(%r15),%r15
+       movzbl  %al,%r10d
+       movzbl  %bl,%r11d
+       movzbl  %cl,%r12d
+       movzbl  (%r14,%r10,1),%r10d
+       movzbl  (%r14,%r11,1),%r11d
+       movzbl  (%r14,%r12,1),%r12d
+
+       movzbl  %dl,%r8d
+       movzbl  %bh,%esi
+       movzbl  %ch,%edi
+       movzbl  (%r14,%r8,1),%r8d
+       movzbl  (%r14,%rsi,1),%r9d
+       movzbl  (%r14,%rdi,1),%r13d
+
+       movzbl  %dh,%ebp
+       movzbl  %ah,%esi
+       shrl    $16,%ecx
+       movzbl  (%r14,%rbp,1),%ebp
+       movzbl  (%r14,%rsi,1),%esi
+       shrl    $16,%edx
+
+       movzbl  %cl,%edi
+       shll    $8,%r9d
+       shll    $8,%r13d
+       movzbl  (%r14,%rdi,1),%edi
+       xorl    %r9d,%r10d
+       xorl    %r13d,%r11d
+
+       movzbl  %dl,%r9d
+       shrl    $16,%eax
+       shrl    $16,%ebx
+       movzbl  %al,%r13d
+       shll    $8,%ebp
+       shll    $8,%esi
+       movzbl  (%r14,%r9,1),%r9d
+       movzbl  (%r14,%r13,1),%r13d
+       xorl    %ebp,%r12d
+       xorl    %esi,%r8d
+
+       movzbl  %bl,%ebp
+       movzbl  %dh,%esi
+       shll    $16,%edi
+       movzbl  (%r14,%rbp,1),%ebp
+       movzbl  (%r14,%rsi,1),%esi
+       xorl    %edi,%r10d
+
+       movzbl  %ah,%edi
+       shrl    $8,%ecx
+       shrl    $8,%ebx
+       movzbl  (%r14,%rdi,1),%edi
+       movzbl  (%r14,%rcx,1),%edx
+       movzbl  (%r14,%rbx,1),%ecx
+       shll    $16,%r9d
+       shll    $16,%r13d
+       shll    $16,%ebp
+       xorl    %r9d,%r11d
+       xorl    %r13d,%r12d
+       xorl    %ebp,%r8d
+
+       shll    $24,%esi
+       shll    $24,%edi
+       shll    $24,%edx
+       xorl    %esi,%r10d
+       shll    $24,%ecx
+       xorl    %edi,%r11d
+       movl    %r10d,%eax
+       movl    %r11d,%ebx
+       xorl    %r12d,%ecx
+       xorl    %r8d,%edx
+       cmpq    16(%rsp),%r15
+       je      .Lenc_compact_done
+       movl    %eax,%esi
+       movl    %ebx,%edi
+       andl    $2155905152,%esi
+       andl    $2155905152,%edi
+       movl    %esi,%r10d
+       movl    %edi,%r11d
+       shrl    $7,%r10d
+       leal    (%rax,%rax,1),%r8d
+       shrl    $7,%r11d
+       leal    (%rbx,%rbx,1),%r9d
+       subl    %r10d,%esi
+       subl    %r11d,%edi
+       andl    $4278124286,%r8d
+       andl    $4278124286,%r9d
+       andl    $454761243,%esi
+       andl    $454761243,%edi
+       movl    %eax,%r10d
+       movl    %ebx,%r11d
+       xorl    %esi,%r8d
+       xorl    %edi,%r9d
+
+       xorl    %r8d,%eax
+       xorl    %r9d,%ebx
+       movl    %ecx,%esi
+       movl    %edx,%edi
+       roll    $24,%eax
+       roll    $24,%ebx
+       andl    $2155905152,%esi
+       andl    $2155905152,%edi
+       xorl    %r8d,%eax
+       xorl    %r9d,%ebx
+       movl    %esi,%r12d
+       movl    %edi,%ebp
+       rorl    $16,%r10d
+       rorl    $16,%r11d
+       shrl    $7,%r12d
+       leal    (%rcx,%rcx,1),%r8d
+       xorl    %r10d,%eax
+       xorl    %r11d,%ebx
+       shrl    $7,%ebp
+       leal    (%rdx,%rdx,1),%r9d
+       rorl    $8,%r10d
+       rorl    $8,%r11d
+       subl    %r12d,%esi
+       subl    %ebp,%edi
+       xorl    %r10d,%eax
+       xorl    %r11d,%ebx
+
+       andl    $4278124286,%r8d
+       andl    $4278124286,%r9d
+       andl    $454761243,%esi
+       andl    $454761243,%edi
+       movl    %ecx,%r12d
+       movl    %edx,%ebp
+       xorl    %esi,%r8d
+       xorl    %edi,%r9d
+
+       xorl    %r8d,%ecx
+       xorl    %r9d,%edx
+       roll    $24,%ecx
+       roll    $24,%edx
+       xorl    %r8d,%ecx
+       xorl    %r9d,%edx
+       movl    0(%r14),%esi
+       rorl    $16,%r12d
+       rorl    $16,%ebp
+       movl    64(%r14),%edi
+       xorl    %r12d,%ecx
+       xorl    %ebp,%edx
+       movl    128(%r14),%r8d
+       rorl    $8,%r12d
+       rorl    $8,%ebp
+       movl    192(%r14),%r9d
+       xorl    %r12d,%ecx
+       xorl    %ebp,%edx
+       jmp     .Lenc_loop_compact
+.align 16
+.Lenc_compact_done:
+       xorl    0(%r15),%eax
+       xorl    4(%r15),%ebx
+       xorl    8(%r15),%ecx
+       xorl    12(%r15),%edx
+.byte  0xf3,0xc3                       
+.size  _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
+.globl AES_encrypt
+.type  AES_encrypt,@function
+.align 16
+AES_encrypt:
+       pushq   %rbx
+       pushq   %rbp
+       pushq   %r12
+       pushq   %r13
+       pushq   %r14
+       pushq   %r15
+
+
+       movq    %rsp,%r10
+       leaq    -63(%rdx),%rcx
+       andq    $-64,%rsp
+       subq    %rsp,%rcx
+       negq    %rcx
+       andq    $960,%rcx
+       subq    %rcx,%rsp
+       subq    $32,%rsp
+
+       movq    %rsi,16(%rsp)
+       movq    %r10,24(%rsp)
+.Lenc_prologue:
+
+       movq    %rdx,%r15
+       movl    240(%r15),%r13d
+
+       movl    0(%rdi),%eax
+       movl    4(%rdi),%ebx
+       movl    8(%rdi),%ecx
+       movl    12(%rdi),%edx
+
+       shll    $4,%r13d
+       leaq    (%r15,%r13,1),%rbp
+       movq    %r15,(%rsp)
+       movq    %rbp,8(%rsp)
+
+
+       leaq    .LAES_Te+2048(%rip),%r14
+       leaq    768(%rsp),%rbp
+       subq    %r14,%rbp
+       andq    $768,%rbp
+       leaq    (%r14,%rbp,1),%r14
+
+       call    _x86_64_AES_encrypt_compact
+
+       movq    16(%rsp),%r9
+       movq    24(%rsp),%rsi
+       movl    %eax,0(%r9)
+       movl    %ebx,4(%r9)
+       movl    %ecx,8(%r9)
+       movl    %edx,12(%r9)
+
+       movq    (%rsi),%r15
+       movq    8(%rsi),%r14
+       movq    16(%rsi),%r13
+       movq    24(%rsi),%r12
+       movq    32(%rsi),%rbp
+       movq    40(%rsi),%rbx
+       leaq    48(%rsi),%rsp
+.Lenc_epilogue:
+       .byte   0xf3,0xc3
+.size  AES_encrypt,.-AES_encrypt
+.type  _x86_64_AES_decrypt,@function
+.align 16
+_x86_64_AES_decrypt:
+       xorl    0(%r15),%eax
+       xorl    4(%r15),%ebx
+       xorl    8(%r15),%ecx
+       xorl    12(%r15),%edx
+
+       movl    240(%r15),%r13d
+       subl    $1,%r13d
+       jmp     .Ldec_loop
+.align 16
+.Ldec_loop:
+
+       movzbl  %al,%esi
+       movzbl  %bl,%edi
+       movzbl  %cl,%ebp
+       movl    0(%r14,%rsi,8),%r10d
+       movl    0(%r14,%rdi,8),%r11d
+       movl    0(%r14,%rbp,8),%r12d
+
+       movzbl  %dh,%esi
+       movzbl  %ah,%edi
+       movzbl  %dl,%ebp
+       xorl    3(%r14,%rsi,8),%r10d
+       xorl    3(%r14,%rdi,8),%r11d
+       movl    0(%r14,%rbp,8),%r8d
+
+       movzbl  %bh,%esi
+       shrl    $16,%eax
+       movzbl  %ch,%ebp
+       xorl    3(%r14,%rsi,8),%r12d
+       shrl    $16,%edx
+       xorl    3(%r14,%rbp,8),%r8d
+
+       shrl    $16,%ebx
+       leaq    16(%r15),%r15
+       shrl    $16,%ecx
+
+       movzbl  %cl,%esi
+       movzbl  %dl,%edi
+       movzbl  %al,%ebp
+       xorl    2(%r14,%rsi,8),%r10d
+       xorl    2(%r14,%rdi,8),%r11d
+       xorl    2(%r14,%rbp,8),%r12d
+
+       movzbl  %bh,%esi
+       movzbl  %ch,%edi
+       movzbl  %bl,%ebp
+       xorl    1(%r14,%rsi,8),%r10d
+       xorl    1(%r14,%rdi,8),%r11d
+       xorl    2(%r14,%rbp,8),%r8d
+
+       movzbl  %dh,%esi
+       movl    12(%r15),%edx
+       movzbl  %ah,%ebp
+       xorl    1(%r14,%rsi,8),%r12d
+       movl    0(%r15),%eax
+       xorl    1(%r14,%rbp,8),%r8d
+
+       xorl    %r10d,%eax
+       movl    4(%r15),%ebx
+       movl    8(%r15),%ecx
+       xorl    %r12d,%ecx
+       xorl    %r11d,%ebx
+       xorl    %r8d,%edx
+       subl    $1,%r13d
+       jnz     .Ldec_loop
+       leaq    2048(%r14),%r14
+       movzbl  %al,%esi
+       movzbl  %bl,%edi
+       movzbl  %cl,%ebp
+       movzbl  (%r14,%rsi,1),%r10d
+       movzbl  (%r14,%rdi,1),%r11d
+       movzbl  (%r14,%rbp,1),%r12d
+
+       movzbl  %dl,%esi
+       movzbl  %dh,%edi
+       movzbl  %ah,%ebp
+       movzbl  (%r14,%rsi,1),%r8d
+       movzbl  (%r14,%rdi,1),%edi
+       movzbl  (%r14,%rbp,1),%ebp
+
+       shll    $8,%edi
+       shll    $8,%ebp
+
+       xorl    %edi,%r10d
+       xorl    %ebp,%r11d
+       shrl    $16,%edx
+
+       movzbl  %bh,%esi
+       movzbl  %ch,%edi
+       shrl    $16,%eax
+       movzbl  (%r14,%rsi,1),%esi
+       movzbl  (%r14,%rdi,1),%edi
+
+       shll    $8,%esi
+       shll    $8,%edi
+       shrl    $16,%ebx
+       xorl    %esi,%r12d
+       xorl    %edi,%r8d
+       shrl    $16,%ecx
+
+       movzbl  %cl,%esi
+       movzbl  %dl,%edi
+       movzbl  %al,%ebp
+       movzbl  (%r14,%rsi,1),%esi
+       movzbl  (%r14,%rdi,1),%edi
+       movzbl  (%r14,%rbp,1),%ebp
+
+       shll    $16,%esi
+       shll    $16,%edi
+       shll    $16,%ebp
+
+       xorl    %esi,%r10d
+       xorl    %edi,%r11d
+       xorl    %ebp,%r12d
+
+       movzbl  %bl,%esi
+       movzbl  %bh,%edi
+       movzbl  %ch,%ebp
+       movzbl  (%r14,%rsi,1),%esi
+       movzbl  (%r14,%rdi,1),%edi
+       movzbl  (%r14,%rbp,1),%ebp
+
+       shll    $16,%esi
+       shll    $24,%edi
+       shll    $24,%ebp
+
+       xorl    %esi,%r8d
+       xorl    %edi,%r10d
+       xorl    %ebp,%r11d
+
+       movzbl  %dh,%esi
+       movzbl  %ah,%edi
+       movl    16+12(%r15),%edx
+       movzbl  (%r14,%rsi,1),%esi
+       movzbl  (%r14,%rdi,1),%edi
+       movl    16+0(%r15),%eax
+
+       shll    $24,%esi
+       shll    $24,%edi
+
+       xorl    %esi,%r12d
+       xorl    %edi,%r8d
+
+       movl    16+4(%r15),%ebx
+       movl    16+8(%r15),%ecx
+       leaq    -2048(%r14),%r14
+       xorl    %r10d,%eax
+       xorl    %r11d,%ebx
+       xorl    %r12d,%ecx
+       xorl    %r8d,%edx
+.byte  0xf3,0xc3                       
+.size  _x86_64_AES_decrypt,.-_x86_64_AES_decrypt
+.type  _x86_64_AES_decrypt_compact,@function
+.align 16
+_x86_64_AES_decrypt_compact:
+       leaq    128(%r14),%r8
+       movl    0-128(%r8),%edi
+       movl    32-128(%r8),%ebp
+       movl    64-128(%r8),%r10d
+       movl    96-128(%r8),%r11d
+       movl    128-128(%r8),%edi
+       movl    160-128(%r8),%ebp
+       movl    192-128(%r8),%r10d
+       movl    224-128(%r8),%r11d
+       jmp     .Ldec_loop_compact
+
+.align 16
+.Ldec_loop_compact:
+       xorl    0(%r15),%eax
+       xorl    4(%r15),%ebx
+       xorl    8(%r15),%ecx
+       xorl    12(%r15),%edx
+       leaq    16(%r15),%r15
+       movzbl  %al,%r10d
+       movzbl  %bl,%r11d
+       movzbl  %cl,%r12d
+       movzbl  (%r14,%r10,1),%r10d
+       movzbl  (%r14,%r11,1),%r11d
+       movzbl  (%r14,%r12,1),%r12d
+
+       movzbl  %dl,%r8d
+       movzbl  %dh,%esi
+       movzbl  %ah,%edi
+       movzbl  (%r14,%r8,1),%r8d
+       movzbl  (%r14,%rsi,1),%r9d
+       movzbl  (%r14,%rdi,1),%r13d
+
+       movzbl  %bh,%ebp
+       movzbl  %ch,%esi
+       shrl    $16,%ecx
+       movzbl  (%r14,%rbp,1),%ebp
+       movzbl  (%r14,%rsi,1),%esi
+       shrl    $16,%edx
+
+       movzbl  %cl,%edi
+       shll    $8,%r9d
+       shll    $8,%r13d
+       movzbl  (%r14,%rdi,1),%edi
+       xorl    %r9d,%r10d
+       xorl    %r13d,%r11d
+
+       movzbl  %dl,%r9d
+       shrl    $16,%eax
+       shrl    $16,%ebx
+       movzbl  %al,%r13d
+       shll    $8,%ebp
+       shll    $8,%esi
+       movzbl  (%r14,%r9,1),%r9d
+       movzbl  (%r14,%r13,1),%r13d
+       xorl    %ebp,%r12d
+       xorl    %esi,%r8d
+
+       movzbl  %bl,%ebp
+       movzbl  %bh,%esi
+       shll    $16,%edi
+       movzbl  (%r14,%rbp,1),%ebp
+       movzbl  (%r14,%rsi,1),%esi
+       xorl    %edi,%r10d
+
+       movzbl  %ch,%edi
+       shll    $16,%r9d
+       shll    $16,%r13d
+       movzbl  (%r14,%rdi,1),%ebx
+       xorl    %r9d,%r11d
+       xorl    %r13d,%r12d
+
+       movzbl  %dh,%edi
+       shrl    $8,%eax
+       shll    $16,%ebp
+       movzbl  (%r14,%rdi,1),%ecx
+       movzbl  (%r14,%rax,1),%edx
+       xorl    %ebp,%r8d
+
+       shll    $24,%esi
+       shll    $24,%ebx
+       shll    $24,%ecx
+       xorl    %esi,%r10d
+       shll    $24,%edx
+       xorl    %r11d,%ebx
+       movl    %r10d,%eax
+       xorl    %r12d,%ecx
+       xorl    %r8d,%edx
+       cmpq    16(%rsp),%r15
+       je      .Ldec_compact_done
+
+       movq    256+0(%r14),%rsi
+       shlq    $32,%rbx
+       shlq    $32,%rdx
+       movq    256+8(%r14),%rdi
+       orq     %rbx,%rax
+       orq     %rdx,%rcx
+       movq    256+16(%r14),%rbp
+       movq    %rax,%rbx
+       movq    %rcx,%rdx
+       andq    %rsi,%rbx
+       andq    %rsi,%rdx
+       movq    %rbx,%r9
+       movq    %rdx,%r12
+       shrq    $7,%r9
+       leaq    (%rax,%rax,1),%r8
+       shrq    $7,%r12
+       leaq    (%rcx,%rcx,1),%r11
+       subq    %r9,%rbx
+       subq    %r12,%rdx
+       andq    %rdi,%r8
+       andq    %rdi,%r11
+       andq    %rbp,%rbx
+       andq    %rbp,%rdx
+       xorq    %r8,%rbx
+       xorq    %r11,%rdx
+       movq    %rbx,%r8
+       movq    %rdx,%r11
+
+       andq    %rsi,%rbx
+       andq    %rsi,%rdx
+       movq    %rbx,%r10
+       movq    %rdx,%r13
+       shrq    $7,%r10
+       leaq    (%r8,%r8,1),%r9
+       shrq    $7,%r13
+       leaq    (%r11,%r11,1),%r12
+       subq    %r10,%rbx
+       subq    %r13,%rdx
+       andq    %rdi,%r9
+       andq    %rdi,%r12
+       andq    %rbp,%rbx
+       andq    %rbp,%rdx
+       xorq    %r9,%rbx
+       xorq    %r12,%rdx
+       movq    %rbx,%r9
+       movq    %rdx,%r12
+
+       andq    %rsi,%rbx
+       andq    %rsi,%rdx
+       movq    %rbx,%r10
+       movq    %rdx,%r13
+       shrq    $7,%r10
+       xorq    %rax,%r8
+       shrq    $7,%r13
+       xorq    %rcx,%r11
+       subq    %r10,%rbx
+       subq    %r13,%rdx
+       leaq    (%r9,%r9,1),%r10
+       leaq    (%r12,%r12,1),%r13
+       xorq    %rax,%r9
+       xorq    %rcx,%r12
+       andq    %rdi,%r10
+       andq    %rdi,%r13
+       andq    %rbp,%rbx
+       andq    %rbp,%rdx
+       xorq    %rbx,%r10
+       xorq    %rdx,%r13
+
+       xorq    %r10,%rax
+       xorq    %r13,%rcx
+       xorq    %r10,%r8
+       xorq    %r13,%r11
+       movq    %rax,%rbx
+       movq    %rcx,%rdx
+       xorq    %r10,%r9
+       xorq    %r13,%r12
+       shrq    $32,%rbx
+       shrq    $32,%rdx
+       xorq    %r8,%r10
+       xorq    %r11,%r13
+       roll    $8,%eax
+       roll    $8,%ecx
+       xorq    %r9,%r10
+       xorq    %r12,%r13
+
+       roll    $8,%ebx
+       roll    $8,%edx
+       xorl    %r10d,%eax
+       xorl    %r13d,%ecx
+       shrq    $32,%r10
+       shrq    $32,%r13
+       xorl    %r10d,%ebx
+       xorl    %r13d,%edx
+
+       movq    %r8,%r10
+       movq    %r11,%r13
+       shrq    $32,%r10
+       shrq    $32,%r13
+       roll    $24,%r8d
+       roll    $24,%r11d
+       roll    $24,%r10d
+       roll    $24,%r13d
+       xorl    %r8d,%eax
+       xorl    %r11d,%ecx
+       movq    %r9,%r8
+       movq    %r12,%r11
+       xorl    %r10d,%ebx
+       xorl    %r13d,%edx
+
+       movq    0(%r14),%rsi
+       shrq    $32,%r8
+       shrq    $32,%r11
+       movq    64(%r14),%rdi
+       roll    $16,%r9d
+       roll    $16,%r12d
+       movq    128(%r14),%rbp
+       roll    $16,%r8d
+       roll    $16,%r11d
+       movq    192(%r14),%r10
+       xorl    %r9d,%eax
+       xorl    %r12d,%ecx
+       movq    256(%r14),%r13
+       xorl    %r8d,%ebx
+       xorl    %r11d,%edx
+       jmp     .Ldec_loop_compact
+.align 16
+.Ldec_compact_done:
+       xorl    0(%r15),%eax
+       xorl    4(%r15),%ebx
+       xorl    8(%r15),%ecx
+       xorl    12(%r15),%edx
+.byte  0xf3,0xc3                       
+.size  _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
+.globl AES_decrypt
+.type  AES_decrypt,@function
+.align 16
+AES_decrypt:
+       pushq   %rbx
+       pushq   %rbp
+       pushq   %r12
+       pushq   %r13
+       pushq   %r14
+       pushq   %r15
+
+
+       movq    %rsp,%r10
+       leaq    -63(%rdx),%rcx
+       andq    $-64,%rsp
+       subq    %rsp,%rcx
+       negq    %rcx
+       andq    $960,%rcx
+       subq    %rcx,%rsp
+       subq    $32,%rsp
+
+       movq    %rsi,16(%rsp)
+       movq    %r10,24(%rsp)
+.Ldec_prologue:
+
+       movq    %rdx,%r15
+       movl    240(%r15),%r13d
+
+       movl    0(%rdi),%eax
+       movl    4(%rdi),%ebx
+       movl    8(%rdi),%ecx
+       movl    12(%rdi),%edx
+
+       shll    $4,%r13d
+       leaq    (%r15,%r13,1),%rbp
+       movq    %r15,(%rsp)
+       movq    %rbp,8(%rsp)
+
+
+       leaq    .LAES_Td+2048(%rip),%r14
+       leaq    768(%rsp),%rbp
+       subq    %r14,%rbp
+       andq    $768,%rbp
+       leaq    (%r14,%rbp,1),%r14
+       shrq    $3,%rbp
+       addq    %rbp,%r14
+
+       call    _x86_64_AES_decrypt_compact
+
+       movq    16(%rsp),%r9
+       movq    24(%rsp),%rsi
+       movl    %eax,0(%r9)
+       movl    %ebx,4(%r9)
+       movl    %ecx,8(%r9)
+       movl    %edx,12(%r9)
+
+       movq    (%rsi),%r15
+       movq    8(%rsi),%r14
+       movq    16(%rsi),%r13
+       movq    24(%rsi),%r12
+       movq    32(%rsi),%rbp
+       movq    40(%rsi),%rbx
+       leaq    48(%rsi),%rsp
+.Ldec_epilogue:
+       .byte   0xf3,0xc3
+.size  AES_decrypt,.-AES_decrypt
+.globl AES_set_encrypt_key
+.type  AES_set_encrypt_key,@function
+.align 16
+AES_set_encrypt_key:
+       pushq   %rbx
+       pushq   %rbp
+       pushq   %r12
+       pushq   %r13
+       pushq   %r14
+       pushq   %r15
+       subq    $8,%rsp
+.Lenc_key_prologue:
+
+       call    _x86_64_AES_set_encrypt_key
+
+       movq    8(%rsp),%r15
+       movq    16(%rsp),%r14
+       movq    24(%rsp),%r13
+       movq    32(%rsp),%r12
+       movq    40(%rsp),%rbp
+       movq    48(%rsp),%rbx
+       addq    $56,%rsp
+.Lenc_key_epilogue:
+       .byte   0xf3,0xc3
+.size  AES_set_encrypt_key,.-AES_set_encrypt_key
+
+.type  _x86_64_AES_set_encrypt_key,@function
+.align 16
+_x86_64_AES_set_encrypt_key:
+       movl    %esi,%ecx
+       movq    %rdi,%rsi
+       movq    %rdx,%rdi
+
+       testq   $-1,%rsi
+       jz      .Lbadpointer
+       testq   $-1,%rdi
+       jz      .Lbadpointer
+
+       leaq    .LAES_Te(%rip),%rbp
+       leaq    2048+128(%rbp),%rbp
+
+
+       movl    0-128(%rbp),%eax
+       movl    32-128(%rbp),%ebx
+       movl    64-128(%rbp),%r8d
+       movl    96-128(%rbp),%edx
+       movl    128-128(%rbp),%eax
+       movl    160-128(%rbp),%ebx
+       movl    192-128(%rbp),%r8d
+       movl    224-128(%rbp),%edx
+
+       cmpl    $128,%ecx
+       je      .L10rounds
+       cmpl    $192,%ecx
+       je      .L12rounds
+       cmpl    $256,%ecx
+       je      .L14rounds
+       movq    $-2,%rax
+       jmp     .Lexit
+
+.L10rounds:
+       movq    0(%rsi),%rax
+       movq    8(%rsi),%rdx
+       movq    %rax,0(%rdi)
+       movq    %rdx,8(%rdi)
+
+       shrq    $32,%rdx
+       xorl    %ecx,%ecx
+       jmp     .L10shortcut
+.align 4
+.L10loop:
+       movl    0(%rdi),%eax
+       movl    12(%rdi),%edx
+.L10shortcut:
+       movzbl  %dl,%esi
+       movzbl  -128(%rbp,%rsi,1),%ebx
+       movzbl  %dh,%esi
+       shll    $24,%ebx
+       xorl    %ebx,%eax
+
+       movzbl  -128(%rbp,%rsi,1),%ebx
+       shrl    $16,%edx
+       movzbl  %dl,%esi
+       xorl    %ebx,%eax
+
+       movzbl  -128(%rbp,%rsi,1),%ebx
+       movzbl  %dh,%esi
+       shll    $8,%ebx
+       xorl    %ebx,%eax
+
+       movzbl  -128(%rbp,%rsi,1),%ebx
+       shll    $16,%ebx
+       xorl    %ebx,%eax
+
+       xorl    1024-128(%rbp,%rcx,4),%eax
+       movl    %eax,16(%rdi)
+       xorl    4(%rdi),%eax
+       movl    %eax,20(%rdi)
+       xorl    8(%rdi),%eax
+       movl    %eax,24(%rdi)
+       xorl    12(%rdi),%eax
+       movl    %eax,28(%rdi)
+       addl    $1,%ecx
+       leaq    16(%rdi),%rdi
+       cmpl    $10,%ecx
+       jl      .L10loop
+
+       movl    $10,80(%rdi)
+       xorq    %rax,%rax
+       jmp     .Lexit
+
+.L12rounds:
+       movq    0(%rsi),%rax
+       movq    8(%rsi),%rbx
+       movq    16(%rsi),%rdx
+       movq    %rax,0(%rdi)
+       movq    %rbx,8(%rdi)
+       movq    %rdx,16(%rdi)
+
+       shrq    $32,%rdx
+       xorl    %ecx,%ecx
+       jmp     .L12shortcut
+.align 4
+.L12loop:
+       movl    0(%rdi),%eax
+       movl    20(%rdi),%edx
+.L12shortcut:
+       movzbl  %dl,%esi
+       movzbl  -128(%rbp,%rsi,1),%ebx
+       movzbl  %dh,%esi
+       shll    $24,%ebx
+       xorl    %ebx,%eax
+
+       movzbl  -128(%rbp,%rsi,1),%ebx
+       shrl    $16,%edx
+       movzbl  %dl,%esi
+       xorl    %ebx,%eax
+
+       movzbl  -128(%rbp,%rsi,1),%ebx
+       movzbl  %dh,%esi
+       shll    $8,%ebx
+       xorl    %ebx,%eax
+
+       movzbl  -128(%rbp,%rsi,1),%ebx
+       shll    $16,%ebx
+       xorl    %ebx,%eax
+
+       xorl    1024-128(%rbp,%rcx,4),%eax
+       movl    %eax,24(%rdi)
+       xorl    4(%rdi),%eax
+       movl    %eax,28(%rdi)
+       xorl    8(%rdi),%eax
+       movl    %eax,32(%rdi)
+       xorl    12(%rdi),%eax
+       movl    %eax,36(%rdi)
+
+       cmpl    $7,%ecx
+       je      .L12break
+       addl    $1,%ecx
+
+       xorl    16(%rdi),%eax
+       movl    %eax,40(%rdi)
+       xorl    20(%rdi),%eax
+       movl    %eax,44(%rdi)
+
+       leaq    24(%rdi),%rdi
+       jmp     .L12loop
+.L12break:
+       movl    $12,72(%rdi)
+       xorq    %rax,%rax
+       jmp     .Lexit
+
+.L14rounds:
+       movq    0(%rsi),%rax
+       movq    8(%rsi),%rbx
+       movq    16(%rsi),%rcx
+       movq    24(%rsi),%rdx
+       movq    %rax,0(%rdi)
+       movq    %rbx,8(%rdi)
+       movq    %rcx,16(%rdi)
+       movq    %rdx,24(%rdi)
+
+       shrq    $32,%rdx
+       xorl    %ecx,%ecx
+       jmp     .L14shortcut
+.align 4
+.L14loop:
+       movl    0(%rdi),%eax
+       movl    28(%rdi),%edx
+.L14shortcut:
+       movzbl  %dl,%esi
+       movzbl  -128(%rbp,%rsi,1),%ebx
+       movzbl  %dh,%esi
+       shll    $24,%ebx
+       xorl    %ebx,%eax
+
+       movzbl  -128(%rbp,%rsi,1),%ebx
+       shrl    $16,%edx
+       movzbl  %dl,%esi
+       xorl    %ebx,%eax
+
+       movzbl  -128(%rbp,%rsi,1),%ebx
+       movzbl  %dh,%esi
+       shll    $8,%ebx
+       xorl    %ebx,%eax
+
+       movzbl  -128(%rbp,%rsi,1),%ebx
+       shll    $16,%ebx
+       xorl    %ebx,%eax
+
+       xorl    1024-128(%rbp,%rcx,4),%eax
+       movl    %eax,32(%rdi)
+       xorl    4(%rdi),%eax
+       movl    %eax,36(%rdi)
+       xorl    8(%rdi),%eax
+       movl    %eax,40(%rdi)
+       xorl    12(%rdi),%eax
+       movl    %eax,44(%rdi)
+
+       cmpl    $6,%ecx
+       je      .L14break
+       addl    $1,%ecx
+
+       movl    %eax,%edx
+       movl    16(%rdi),%eax
+       movzbl  %dl,%esi
+       movzbl  -128(%rbp,%rsi,1),%ebx
+       movzbl  %dh,%esi
+       xorl    %ebx,%eax
+
+       movzbl  -128(%rbp,%rsi,1),%ebx
+       shrl    $16,%edx
+       shll    $8,%ebx
+       movzbl  %dl,%esi
+       xorl    %ebx,%eax
+
+       movzbl  -128(%rbp,%rsi,1),%ebx
+       movzbl  %dh,%esi
+       shll    $16,%ebx
+       xorl    %ebx,%eax
+
+       movzbl  -128(%rbp,%rsi,1),%ebx
+       shll    $24,%ebx
+       xorl    %ebx,%eax
+
+       movl    %eax,48(%rdi)
+       xorl    20(%rdi),%eax
+       movl    %eax,52(%rdi)
+       xorl    24(%rdi),%eax
+       movl    %eax,56(%rdi)
+       xorl    28(%rdi),%eax
+       movl    %eax,60(%rdi)
+
+       leaq    32(%rdi),%rdi
+       jmp     .L14loop
+.L14break:
+       movl    $14,48(%rdi)
+       xorq    %rax,%rax
+       jmp     .Lexit
+
+.Lbadpointer:
+       movq    $-1,%rax
+.Lexit:
+.byte  0xf3,0xc3                       
+.size  _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
+.globl AES_set_decrypt_key
+.type  AES_set_decrypt_key,@function
+.align 16
+AES_set_decrypt_key:
+       pushq   %rbx
+       pushq   %rbp
+       pushq   %r12
+       pushq   %r13
+       pushq   %r14
+       pushq   %r15
+       pushq   %rdx
+.Ldec_key_prologue:
+
+       call    _x86_64_AES_set_encrypt_key
+       movq    (%rsp),%r8
+       cmpl    $0,%eax
+       jne     .Labort
+
+       movl    240(%r8),%r14d
+       xorq    %rdi,%rdi
+       leaq    (%rdi,%r14,4),%rcx
+       movq    %r8,%rsi
+       leaq    (%r8,%rcx,4),%rdi
+.align 4
+.Linvert:
+       movq    0(%rsi),%rax
+       movq    8(%rsi),%rbx
+       movq    0(%rdi),%rcx
+       movq    8(%rdi),%rdx
+       movq    %rax,0(%rdi)
+       movq    %rbx,8(%rdi)
+       movq    %rcx,0(%rsi)
+       movq    %rdx,8(%rsi)
+       leaq    16(%rsi),%rsi
+       leaq    -16(%rdi),%rdi
+       cmpq    %rsi,%rdi
+       jne     .Linvert
+
+       leaq    .LAES_Te+2048+1024(%rip),%rax
+
+       movq    40(%rax),%rsi
+       movq    48(%rax),%rdi
+       movq    56(%rax),%rbp
+
+       movq    %r8,%r15
+       subl    $1,%r14d
+.align 4
+.Lpermute:
+       leaq    16(%r15),%r15
+       movq    0(%r15),%rax
+       movq    8(%r15),%rcx
+       movq    %rax,%rbx
+       movq    %rcx,%rdx
+       andq    %rsi,%rbx
+       andq    %rsi,%rdx
+       movq    %rbx,%r9
+       movq    %rdx,%r12
+       shrq    $7,%r9
+       leaq    (%rax,%rax,1),%r8
+       shrq    $7,%r12
+       leaq    (%rcx,%rcx,1),%r11
+       subq    %r9,%rbx
+       subq    %r12,%rdx
+       andq    %rdi,%r8
+       andq    %rdi,%r11
+       andq    %rbp,%rbx
+       andq    %rbp,%rdx
+       xorq    %r8,%rbx
+       xorq    %r11,%rdx
+       movq    %rbx,%r8
+       movq    %rdx,%r11
+
+       andq    %rsi,%rbx
+       andq    %rsi,%rdx
+       movq    %rbx,%r10
+       movq    %rdx,%r13
+       shrq    $7,%r10
+       leaq    (%r8,%r8,1),%r9
+       shrq    $7,%r13
+       leaq    (%r11,%r11,1),%r12
+       subq    %r10,%rbx
+       subq    %r13,%rdx
+       andq    %rdi,%r9
+       andq    %rdi,%r12
+       andq    %rbp,%rbx
+       andq    %rbp,%rdx
+       xorq    %r9,%rbx
+       xorq    %r12,%rdx
+       movq    %rbx,%r9
+       movq    %rdx,%r12
+
+       andq    %rsi,%rbx
+       andq    %rsi,%rdx
+       movq    %rbx,%r10
+       movq    %rdx,%r13
+       shrq    $7,%r10
+       xorq    %rax,%r8
+       shrq    $7,%r13
+       xorq    %rcx,%r11
+       subq    %r10,%rbx
+       subq    %r13,%rdx
+       leaq    (%r9,%r9,1),%r10
+       leaq    (%r12,%r12,1),%r13
+       xorq    %rax,%r9
+       xorq    %rcx,%r12
+       andq    %rdi,%r10
+       andq    %rdi,%r13
+       andq    %rbp,%rbx
+       andq    %rbp,%rdx
+       xorq    %rbx,%r10
+       xorq    %rdx,%r13
+
+       xorq    %r10,%rax
+       xorq    %r13,%rcx
+       xorq    %r10,%r8
+       xorq    %r13,%r11
+       movq    %rax,%rbx
+       movq    %rcx,%rdx
+       xorq    %r10,%r9
+       xorq    %r13,%r12
+       shrq    $32,%rbx
+       shrq    $32,%rdx
+       xorq    %r8,%r10
+       xorq    %r11,%r13
+       roll    $8,%eax
+       roll    $8,%ecx
+       xorq    %r9,%r10
+       xorq    %r12,%r13
+
+       roll    $8,%ebx
+       roll    $8,%edx
+       xorl    %r10d,%eax
+       xorl    %r13d,%ecx
+       shrq    $32,%r10
+       shrq    $32,%r13
+       xorl    %r10d,%ebx
+       xorl    %r13d,%edx
+
+       movq    %r8,%r10
+       movq    %r11,%r13
+       shrq    $32,%r10
+       shrq    $32,%r13
+       roll    $24,%r8d
+       roll    $24,%r11d
+       roll    $24,%r10d
+       roll    $24,%r13d
+       xorl    %r8d,%eax
+       xorl    %r11d,%ecx
+       movq    %r9,%r8
+       movq    %r12,%r11
+       xorl    %r10d,%ebx
+       xorl    %r13d,%edx
+
+
+       shrq    $32,%r8
+       shrq    $32,%r11
+
+       roll    $16,%r9d
+       roll    $16,%r12d
+
+       roll    $16,%r8d
+       roll    $16,%r11d
+
+       xorl    %r9d,%eax
+       xorl    %r12d,%ecx
+
+       xorl    %r8d,%ebx
+       xorl    %r11d,%edx
+       movl    %eax,0(%r15)
+       movl    %ebx,4(%r15)
+       movl    %ecx,8(%r15)
+       movl    %edx,12(%r15)
+       subl    $1,%r14d
+       jnz     .Lpermute
+
+       xorq    %rax,%rax
+.Labort:
+       movq    8(%rsp),%r15
+       movq    16(%rsp),%r14
+       movq    24(%rsp),%r13
+       movq    32(%rsp),%r12
+       movq    40(%rsp),%rbp
+       movq    48(%rsp),%rbx
+       addq    $56,%rsp
+.Ldec_key_epilogue:
+       .byte   0xf3,0xc3
+.size  AES_set_decrypt_key,.-AES_set_decrypt_key
+.globl AES_cbc_encrypt
+.type  AES_cbc_encrypt,@function
+.align 16
+
+AES_cbc_encrypt:
+       cmpq    $0,%rdx
+       je      .Lcbc_epilogue
+       pushfq
+       pushq   %rbx
+       pushq   %rbp
+       pushq   %r12
+       pushq   %r13
+       pushq   %r14
+       pushq   %r15
+.Lcbc_prologue:
+
+       cld
+       movl    %r9d,%r9d
+
+       leaq    .LAES_Te(%rip),%r14
+       cmpq    $0,%r9
+       jne     .Lcbc_picked_te
+       leaq    .LAES_Td(%rip),%r14
+.Lcbc_picked_te:
+
+       movl    OPENSSL_ia32cap_P(%rip),%r10d
+       cmpq    $512,%rdx
+       jb      .Lcbc_slow_prologue
+       testq   $15,%rdx
+       jnz     .Lcbc_slow_prologue
+       btl     $28,%r10d
+       jc      .Lcbc_slow_prologue
+
+
+       leaq    -88-248(%rsp),%r15
+       andq    $-64,%r15
+
+
+       movq    %r14,%r10
+       leaq    2304(%r14),%r11
+       movq    %r15,%r12
+       andq    $4095,%r10
+       andq    $4095,%r11
+       andq    $4095,%r12
+
+       cmpq    %r11,%r12
+       jb      .Lcbc_te_break_out
+       subq    %r11,%r12
+       subq    %r12,%r15
+       jmp     .Lcbc_te_ok
+.Lcbc_te_break_out:
+       subq    %r10,%r12
+       andq    $4095,%r12
+       addq    $320,%r12
+       subq    %r12,%r15
+.align 4
+.Lcbc_te_ok:
+
+       xchgq   %rsp,%r15
+
+       movq    %r15,16(%rsp)
+.Lcbc_fast_body:
+       movq    %rdi,24(%rsp)
+       movq    %rsi,32(%rsp)
+       movq    %rdx,40(%rsp)
+       movq    %rcx,48(%rsp)
+       movq    %r8,56(%rsp)
+       movl    $0,80+240(%rsp)
+       movq    %r8,%rbp
+       movq    %r9,%rbx
+       movq    %rsi,%r9
+       movq    %rdi,%r8
+       movq    %rcx,%r15
+
+       movl    240(%r15),%eax
+
+       movq    %r15,%r10
+       subq    %r14,%r10
+       andq    $4095,%r10
+       cmpq    $2304,%r10
+       jb      .Lcbc_do_ecopy
+       cmpq    $4096-248,%r10
+       jb      .Lcbc_skip_ecopy
+.align 4
+.Lcbc_do_ecopy:
+       movq    %r15,%rsi
+       leaq    80(%rsp),%rdi
+       leaq    80(%rsp),%r15
+       movl    $30,%ecx
+.long  0x90A548F3      
+       movl    %eax,(%rdi)
+.Lcbc_skip_ecopy:
+       movq    %r15,0(%rsp)
+
+       movl    $18,%ecx
+.align 4
+.Lcbc_prefetch_te:
+       movq    0(%r14),%r10
+       movq    32(%r14),%r11
+       movq    64(%r14),%r12
+       movq    96(%r14),%r13
+       leaq    128(%r14),%r14
+       subl    $1,%ecx
+       jnz     .Lcbc_prefetch_te
+       leaq    -2304(%r14),%r14
+
+       cmpq    $0,%rbx
+       je      .LFAST_DECRYPT
+
+
+       movl    0(%rbp),%eax
+       movl    4(%rbp),%ebx
+       movl    8(%rbp),%ecx
+       movl    12(%rbp),%edx
+
+.align 4
+.Lcbc_fast_enc_loop:
+       xorl    0(%r8),%eax
+       xorl    4(%r8),%ebx
+       xorl    8(%r8),%ecx
+       xorl    12(%r8),%edx
+       movq    0(%rsp),%r15
+       movq    %r8,24(%rsp)
+
+       call    _x86_64_AES_encrypt
+
+       movq    24(%rsp),%r8
+       movq    40(%rsp),%r10
+       movl    %eax,0(%r9)
+       movl    %ebx,4(%r9)
+       movl    %ecx,8(%r9)
+       movl    %edx,12(%r9)
+
+       leaq    16(%r8),%r8
+       leaq    16(%r9),%r9
+       subq    $16,%r10
+       testq   $-16,%r10
+       movq    %r10,40(%rsp)
+       jnz     .Lcbc_fast_enc_loop
+       movq    56(%rsp),%rbp
+       movl    %eax,0(%rbp)
+       movl    %ebx,4(%rbp)
+       movl    %ecx,8(%rbp)
+       movl    %edx,12(%rbp)
+
+       jmp     .Lcbc_fast_cleanup
+
+
+.align 16
+.LFAST_DECRYPT:
+       cmpq    %r8,%r9
+       je      .Lcbc_fast_dec_in_place
+
+       movq    %rbp,64(%rsp)
+.align 4
+.Lcbc_fast_dec_loop:
+       movl    0(%r8),%eax
+       movl    4(%r8),%ebx
+       movl    8(%r8),%ecx
+       movl    12(%r8),%edx
+       movq    0(%rsp),%r15
+       movq    %r8,24(%rsp)
+
+       call    _x86_64_AES_decrypt
+
+       movq    64(%rsp),%rbp
+       movq    24(%rsp),%r8
+       movq    40(%rsp),%r10
+       xorl    0(%rbp),%eax
+       xorl    4(%rbp),%ebx
+       xorl    8(%rbp),%ecx
+       xorl    12(%rbp),%edx
+       movq    %r8,%rbp
+
+       subq    $16,%r10
+       movq    %r10,40(%rsp)
+       movq    %rbp,64(%rsp)
+
+       movl    %eax,0(%r9)
+       movl    %ebx,4(%r9)
+       movl    %ecx,8(%r9)
+       movl    %edx,12(%r9)
+
+       leaq    16(%r8),%r8
+       leaq    16(%r9),%r9
+       jnz     .Lcbc_fast_dec_loop
+       movq    56(%rsp),%r12
+       movq    0(%rbp),%r10
+       movq    8(%rbp),%r11
+       movq    %r10,0(%r12)
+       movq    %r11,8(%r12)
+       jmp     .Lcbc_fast_cleanup
+
+.align 16
+.Lcbc_fast_dec_in_place:
+       movq    0(%rbp),%r10
+       movq    8(%rbp),%r11
+       movq    %r10,0+64(%rsp)
+       movq    %r11,8+64(%rsp)
+.align 4
+.Lcbc_fast_dec_in_place_loop:
+       movl    0(%r8),%eax
+       movl    4(%r8),%ebx
+       movl    8(%r8),%ecx
+       movl    12(%r8),%edx
+       movq    0(%rsp),%r15
+       movq    %r8,24(%rsp)
+
+       call    _x86_64_AES_decrypt
+
+       movq    24(%rsp),%r8
+       movq    40(%rsp),%r10
+       xorl    0+64(%rsp),%eax
+       xorl    4+64(%rsp),%ebx
+       xorl    8+64(%rsp),%ecx
+       xorl    12+64(%rsp),%edx
+
+       movq    0(%r8),%r11
+       movq    8(%r8),%r12
+       subq    $16,%r10
+       jz      .Lcbc_fast_dec_in_place_done
+
+       movq    %r11,0+64(%rsp)
+       movq    %r12,8+64(%rsp)
+
+       movl    %eax,0(%r9)
+       movl    %ebx,4(%r9)
+       movl    %ecx,8(%r9)
+       movl    %edx,12(%r9)
+
+       leaq    16(%r8),%r8
+       leaq    16(%r9),%r9
+       movq    %r10,40(%rsp)
+       jmp     .Lcbc_fast_dec_in_place_loop
+.Lcbc_fast_dec_in_place_done:
+       movq    56(%rsp),%rdi
+       movq    %r11,0(%rdi)
+       movq    %r12,8(%rdi)
+
+       movl    %eax,0(%r9)
+       movl    %ebx,4(%r9)
+       movl    %ecx,8(%r9)
+       movl    %edx,12(%r9)
+
+.align 4
+.Lcbc_fast_cleanup:
+       cmpl    $0,80+240(%rsp)
+       leaq    80(%rsp),%rdi
+       je      .Lcbc_exit
+       movl    $30,%ecx
+       xorq    %rax,%rax
+.long  0x90AB48F3      
+
+       jmp     .Lcbc_exit
+
+
+.align 16
+.Lcbc_slow_prologue:
+
+       leaq    -88(%rsp),%rbp
+       andq    $-64,%rbp
+
+       leaq    -88-63(%rcx),%r10
+       subq    %rbp,%r10
+       negq    %r10
+       andq    $960,%r10
+       subq    %r10,%rbp
+
+       xchgq   %rsp,%rbp
+
+       movq    %rbp,16(%rsp)
+.Lcbc_slow_body:
+
+
+
+
+       movq    %r8,56(%rsp)
+       movq    %r8,%rbp
+       movq    %r9,%rbx
+       movq    %rsi,%r9
+       movq    %rdi,%r8
+       movq    %rcx,%r15
+       movq    %rdx,%r10
+
+       movl    240(%r15),%eax
+       movq    %r15,0(%rsp)
+       shll    $4,%eax
+       leaq    (%r15,%rax,1),%rax
+       movq    %rax,8(%rsp)
+
+
+       leaq    2048(%r14),%r14
+       leaq    768-8(%rsp),%rax
+       subq    %r14,%rax
+       andq    $768,%rax
+       leaq    (%r14,%rax,1),%r14
+
+       cmpq    $0,%rbx
+       je      .LSLOW_DECRYPT
+
+
+       testq   $-16,%r10
+       movl    0(%rbp),%eax
+       movl    4(%rbp),%ebx
+       movl    8(%rbp),%ecx
+       movl    12(%rbp),%edx
+       jz      .Lcbc_slow_enc_tail     
+
+.align 4
+.Lcbc_slow_enc_loop:
+       xorl    0(%r8),%eax
+       xorl    4(%r8),%ebx
+       xorl    8(%r8),%ecx
+       xorl    12(%r8),%edx
+       movq    0(%rsp),%r15
+       movq    %r8,24(%rsp)
+       movq    %r9,32(%rsp)
+       movq    %r10,40(%rsp)
+
+       call    _x86_64_AES_encrypt_compact
+
+       movq    24(%rsp),%r8
+       movq    32(%rsp),%r9
+       movq    40(%rsp),%r10
+       movl    %eax,0(%r9)
+       movl    %ebx,4(%r9)
+       movl    %ecx,8(%r9)
+       movl    %edx,12(%r9)
+
+       leaq    16(%r8),%r8
+       leaq    16(%r9),%r9
+       subq    $16,%r10
+       testq   $-16,%r10
+       jnz     .Lcbc_slow_enc_loop
+       testq   $15,%r10
+       jnz     .Lcbc_slow_enc_tail
+       movq    56(%rsp),%rbp
+       movl    %eax,0(%rbp)
+       movl    %ebx,4(%rbp)
+       movl    %ecx,8(%rbp)
+       movl    %edx,12(%rbp)
+
+       jmp     .Lcbc_exit
+
+.align 4
+.Lcbc_slow_enc_tail:
+       movq    %rax,%r11
+       movq    %rcx,%r12
+       movq    %r10,%rcx
+       movq    %r8,%rsi
+       movq    %r9,%rdi
+.long  0x9066A4F3              
+       movq    $16,%rcx
+       subq    %r10,%rcx
+       xorq    %rax,%rax
+.long  0x9066AAF3              
+       movq    %r9,%r8
+       movq    $16,%r10
+       movq    %r11,%rax
+       movq    %r12,%rcx
+       jmp     .Lcbc_slow_enc_loop     
+
+.align 16
+.LSLOW_DECRYPT:
+       shrq    $3,%rax
+       addq    %rax,%r14
+
+       movq    0(%rbp),%r11
+       movq    8(%rbp),%r12
+       movq    %r11,0+64(%rsp)
+       movq    %r12,8+64(%rsp)
+
+.align 4
+.Lcbc_slow_dec_loop:
+       movl    0(%r8),%eax
+       movl    4(%r8),%ebx
+       movl    8(%r8),%ecx
+       movl    12(%r8),%edx
+       movq    0(%rsp),%r15
+       movq    %r8,24(%rsp)
+       movq    %r9,32(%rsp)
+       movq    %r10,40(%rsp)
+
+       call    _x86_64_AES_decrypt_compact
+
+       movq    24(%rsp),%r8
+       movq    32(%rsp),%r9
+       movq    40(%rsp),%r10
+       xorl    0+64(%rsp),%eax
+       xorl    4+64(%rsp),%ebx
+       xorl    8+64(%rsp),%ecx
+       xorl    12+64(%rsp),%edx
+
+       movq    0(%r8),%r11
+       movq    8(%r8),%r12
+       subq    $16,%r10
+       jc      .Lcbc_slow_dec_partial
+       jz      .Lcbc_slow_dec_done
+
+       movq    %r11,0+64(%rsp)
+       movq    %r12,8+64(%rsp)
+
+       movl    %eax,0(%r9)
+       movl    %ebx,4(%r9)
+       movl    %ecx,8(%r9)
+       movl    %edx,12(%r9)
+
+       leaq    16(%r8),%r8
+       leaq    16(%r9),%r9
+       jmp     .Lcbc_slow_dec_loop
+.Lcbc_slow_dec_done:
+       movq    56(%rsp),%rdi
+       movq    %r11,0(%rdi)
+       movq    %r12,8(%rdi)
+
+       movl    %eax,0(%r9)
+       movl    %ebx,4(%r9)
+       movl    %ecx,8(%r9)
+       movl    %edx,12(%r9)
+
+       jmp     .Lcbc_exit
+
+.align 4
+.Lcbc_slow_dec_partial:
+       movq    56(%rsp),%rdi
+       movq    %r11,0(%rdi)
+       movq    %r12,8(%rdi)
+
+       movl    %eax,0+64(%rsp)
+       movl    %ebx,4+64(%rsp)
+       movl    %ecx,8+64(%rsp)
+       movl    %edx,12+64(%rsp)
+
+       movq    %r9,%rdi
+       leaq    64(%rsp),%rsi
+       leaq    16(%r10),%rcx
+.long  0x9066A4F3      
+       jmp     .Lcbc_exit
+
+.align 16
+.Lcbc_exit:
+       movq    16(%rsp),%rsi
+       movq    (%rsi),%r15
+       movq    8(%rsi),%r14
+       movq    16(%rsi),%r13
+       movq    24(%rsi),%r12
+       movq    32(%rsi),%rbp
+       movq    40(%rsi),%rbx
+       leaq    48(%rsi),%rsp
+.Lcbc_popfq:
+       popfq
+.Lcbc_epilogue:
+       .byte   0xf3,0xc3
+.size  AES_cbc_encrypt,.-AES_cbc_encrypt
+.align 64
+.LAES_Te:
+.long  0xa56363c6,0xa56363c6
+.long  0x847c7cf8,0x847c7cf8
+.long  0x997777ee,0x997777ee
+.long  0x8d7b7bf6,0x8d7b7bf6
+.long  0x0df2f2ff,0x0df2f2ff
+.long  0xbd6b6bd6,0xbd6b6bd6
+.long  0xb16f6fde,0xb16f6fde
+.long  0x54c5c591,0x54c5c591
+.long  0x50303060,0x50303060
+.long  0x03010102,0x03010102
+.long  0xa96767ce,0xa96767ce
+.long  0x7d2b2b56,0x7d2b2b56
+.long  0x19fefee7,0x19fefee7
+.long  0x62d7d7b5,0x62d7d7b5
+.long  0xe6abab4d,0xe6abab4d
+.long  0x9a7676ec,0x9a7676ec
+.long  0x45caca8f,0x45caca8f
+.long  0x9d82821f,0x9d82821f
+.long  0x40c9c989,0x40c9c989
+.long  0x877d7dfa,0x877d7dfa
+.long  0x15fafaef,0x15fafaef
+.long  0xeb5959b2,0xeb5959b2
+.long  0xc947478e,0xc947478e
+.long  0x0bf0f0fb,0x0bf0f0fb
+.long  0xecadad41,0xecadad41
+.long  0x67d4d4b3,0x67d4d4b3
+.long  0xfda2a25f,0xfda2a25f
+.long  0xeaafaf45,0xeaafaf45
+.long  0xbf9c9c23,0xbf9c9c23
+.long  0xf7a4a453,0xf7a4a453
+.long  0x967272e4,0x967272e4
+.long  0x5bc0c09b,0x5bc0c09b
+.long  0xc2b7b775,0xc2b7b775
+.long  0x1cfdfde1,0x1cfdfde1
+.long  0xae93933d,0xae93933d
+.long  0x6a26264c,0x6a26264c
+.long  0x5a36366c,0x5a36366c
+.long  0x413f3f7e,0x413f3f7e
+.long  0x02f7f7f5,0x02f7f7f5
+.long  0x4fcccc83,0x4fcccc83
+.long  0x5c343468,0x5c343468
+.long  0xf4a5a551,0xf4a5a551
+.long  0x34e5e5d1,0x34e5e5d1
+.long  0x08f1f1f9,0x08f1f1f9
+.long  0x937171e2,0x937171e2
+.long  0x73d8d8ab,0x73d8d8ab
+.long  0x53313162,0x53313162
+.long  0x3f15152a,0x3f15152a
+.long  0x0c040408,0x0c040408
+.long  0x52c7c795,0x52c7c795
+.long  0x65232346,0x65232346
+.long  0x5ec3c39d,0x5ec3c39d
+.long  0x28181830,0x28181830
+.long  0xa1969637,0xa1969637
+.long  0x0f05050a,0x0f05050a
+.long  0xb59a9a2f,0xb59a9a2f
+.long  0x0907070e,0x0907070e
+.long  0x36121224,0x36121224
+.long  0x9b80801b,0x9b80801b
+.long  0x3de2e2df,0x3de2e2df
+.long  0x26ebebcd,0x26ebebcd
+.long  0x6927274e,0x6927274e
+.long  0xcdb2b27f,0xcdb2b27f
+.long  0x9f7575ea,0x9f7575ea
+.long  0x1b090912,0x1b090912
+.long  0x9e83831d,0x9e83831d
+.long  0x742c2c58,0x742c2c58
+.long  0x2e1a1a34,0x2e1a1a34
+.long  0x2d1b1b36,0x2d1b1b36
+.long  0xb26e6edc,0xb26e6edc
+.long  0xee5a5ab4,0xee5a5ab4
+.long  0xfba0a05b,0xfba0a05b
+.long  0xf65252a4,0xf65252a4
+.long  0x4d3b3b76,0x4d3b3b76
+.long  0x61d6d6b7,0x61d6d6b7
+.long  0xceb3b37d,0xceb3b37d
+.long  0x7b292952,0x7b292952
+.long  0x3ee3e3dd,0x3ee3e3dd
+.long  0x712f2f5e,0x712f2f5e
+.long  0x97848413,0x97848413
+.long  0xf55353a6,0xf55353a6
+.long  0x68d1d1b9,0x68d1d1b9
+.long  0x00000000,0x00000000
+.long  0x2cededc1,0x2cededc1
+.long  0x60202040,0x60202040
+.long  0x1ffcfce3,0x1ffcfce3
+.long  0xc8b1b179,0xc8b1b179
+.long  0xed5b5bb6,0xed5b5bb6
+.long  0xbe6a6ad4,0xbe6a6ad4
+.long  0x46cbcb8d,0x46cbcb8d
+.long  0xd9bebe67,0xd9bebe67
+.long  0x4b393972,0x4b393972
+.long  0xde4a4a94,0xde4a4a94
+.long  0xd44c4c98,0xd44c4c98
+.long  0xe85858b0,0xe85858b0
+.long  0x4acfcf85,0x4acfcf85
+.long  0x6bd0d0bb,0x6bd0d0bb
+.long  0x2aefefc5,0x2aefefc5
+.long  0xe5aaaa4f,0xe5aaaa4f
+.long  0x16fbfbed,0x16fbfbed
+.long  0xc5434386,0xc5434386
+.long  0xd74d4d9a,0xd74d4d9a
+.long  0x55333366,0x55333366
+.long  0x94858511,0x94858511
+.long  0xcf45458a,0xcf45458a
+.long  0x10f9f9e9,0x10f9f9e9
+.long  0x06020204,0x06020204
+.long  0x817f7ffe,0x817f7ffe
+.long  0xf05050a0,0xf05050a0
+.long  0x443c3c78,0x443c3c78
+.long  0xba9f9f25,0xba9f9f25
+.long  0xe3a8a84b,0xe3a8a84b
+.long  0xf35151a2,0xf35151a2
+.long  0xfea3a35d,0xfea3a35d
+.long  0xc0404080,0xc0404080
+.long  0x8a8f8f05,0x8a8f8f05
+.long  0xad92923f,0xad92923f
+.long  0xbc9d9d21,0xbc9d9d21
+.long  0x48383870,0x48383870
+.long  0x04f5f5f1,0x04f5f5f1
+.long  0xdfbcbc63,0xdfbcbc63
+.long  0xc1b6b677,0xc1b6b677
+.long  0x75dadaaf,0x75dadaaf
+.long  0x63212142,0x63212142
+.long  0x30101020,0x30101020
+.long  0x1affffe5,0x1affffe5
+.long  0x0ef3f3fd,0x0ef3f3fd
+.long  0x6dd2d2bf,0x6dd2d2bf
+.long  0x4ccdcd81,0x4ccdcd81
+.long  0x140c0c18,0x140c0c18
+.long  0x35131326,0x35131326
+.long  0x2fececc3,0x2fececc3
+.long  0xe15f5fbe,0xe15f5fbe
+.long  0xa2979735,0xa2979735
+.long  0xcc444488,0xcc444488
+.long  0x3917172e,0x3917172e
+.long  0x57c4c493,0x57c4c493
+.long  0xf2a7a755,0xf2a7a755
+.long  0x827e7efc,0x827e7efc
+.long  0x473d3d7a,0x473d3d7a
+.long  0xac6464c8,0xac6464c8
+.long  0xe75d5dba,0xe75d5dba
+.long  0x2b191932,0x2b191932
+.long  0x957373e6,0x957373e6
+.long  0xa06060c0,0xa06060c0
+.long  0x98818119,0x98818119
+.long  0xd14f4f9e,0xd14f4f9e
+.long  0x7fdcdca3,0x7fdcdca3
+.long  0x66222244,0x66222244
+.long  0x7e2a2a54,0x7e2a2a54
+.long  0xab90903b,0xab90903b
+.long  0x8388880b,0x8388880b
+.long  0xca46468c,0xca46468c
+.long  0x29eeeec7,0x29eeeec7
+.long  0xd3b8b86b,0xd3b8b86b
+.long  0x3c141428,0x3c141428
+.long  0x79dedea7,0x79dedea7
+.long  0xe25e5ebc,0xe25e5ebc
+.long  0x1d0b0b16,0x1d0b0b16
+.long  0x76dbdbad,0x76dbdbad
+.long  0x3be0e0db,0x3be0e0db
+.long  0x56323264,0x56323264
+.long  0x4e3a3a74,0x4e3a3a74
+.long  0x1e0a0a14,0x1e0a0a14
+.long  0xdb494992,0xdb494992
+.long  0x0a06060c,0x0a06060c
+.long  0x6c242448,0x6c242448
+.long  0xe45c5cb8,0xe45c5cb8
+.long  0x5dc2c29f,0x5dc2c29f
+.long  0x6ed3d3bd,0x6ed3d3bd
+.long  0xefacac43,0xefacac43
+.long  0xa66262c4,0xa66262c4
+.long  0xa8919139,0xa8919139
+.long  0xa4959531,0xa4959531
+.long  0x37e4e4d3,0x37e4e4d3
+.long  0x8b7979f2,0x8b7979f2
+.long  0x32e7e7d5,0x32e7e7d5
+.long  0x43c8c88b,0x43c8c88b
+.long  0x5937376e,0x5937376e
+.long  0xb76d6dda,0xb76d6dda
+.long  0x8c8d8d01,0x8c8d8d01
+.long  0x64d5d5b1,0x64d5d5b1
+.long  0xd24e4e9c,0xd24e4e9c
+.long  0xe0a9a949,0xe0a9a949
+.long  0xb46c6cd8,0xb46c6cd8
+.long  0xfa5656ac,0xfa5656ac
+.long  0x07f4f4f3,0x07f4f4f3
+.long  0x25eaeacf,0x25eaeacf
+.long  0xaf6565ca,0xaf6565ca
+.long  0x8e7a7af4,0x8e7a7af4
+.long  0xe9aeae47,0xe9aeae47
+.long  0x18080810,0x18080810
+.long  0xd5baba6f,0xd5baba6f
+.long  0x887878f0,0x887878f0
+.long  0x6f25254a,0x6f25254a
+.long  0x722e2e5c,0x722e2e5c
+.long  0x241c1c38,0x241c1c38
+.long  0xf1a6a657,0xf1a6a657
+.long  0xc7b4b473,0xc7b4b473
+.long  0x51c6c697,0x51c6c697
+.long  0x23e8e8cb,0x23e8e8cb
+.long  0x7cdddda1,0x7cdddda1
+.long  0x9c7474e8,0x9c7474e8
+.long  0x211f1f3e,0x211f1f3e
+.long  0xdd4b4b96,0xdd4b4b96
+.long  0xdcbdbd61,0xdcbdbd61
+.long  0x868b8b0d,0x868b8b0d
+.long  0x858a8a0f,0x858a8a0f
+.long  0x907070e0,0x907070e0
+.long  0x423e3e7c,0x423e3e7c
+.long  0xc4b5b571,0xc4b5b571
+.long  0xaa6666cc,0xaa6666cc
+.long  0xd8484890,0xd8484890
+.long  0x05030306,0x05030306
+.long  0x01f6f6f7,0x01f6f6f7
+.long  0x120e0e1c,0x120e0e1c
+.long  0xa36161c2,0xa36161c2
+.long  0x5f35356a,0x5f35356a
+.long  0xf95757ae,0xf95757ae
+.long  0xd0b9b969,0xd0b9b969
+.long  0x91868617,0x91868617
+.long  0x58c1c199,0x58c1c199
+.long  0x271d1d3a,0x271d1d3a
+.long  0xb99e9e27,0xb99e9e27
+.long  0x38e1e1d9,0x38e1e1d9
+.long  0x13f8f8eb,0x13f8f8eb
+.long  0xb398982b,0xb398982b
+.long  0x33111122,0x33111122
+.long  0xbb6969d2,0xbb6969d2
+.long  0x70d9d9a9,0x70d9d9a9
+.long  0x898e8e07,0x898e8e07
+.long  0xa7949433,0xa7949433
+.long  0xb69b9b2d,0xb69b9b2d
+.long  0x221e1e3c,0x221e1e3c
+.long  0x92878715,0x92878715
+.long  0x20e9e9c9,0x20e9e9c9
+.long  0x49cece87,0x49cece87
+.long  0xff5555aa,0xff5555aa
+.long  0x78282850,0x78282850
+.long  0x7adfdfa5,0x7adfdfa5
+.long  0x8f8c8c03,0x8f8c8c03
+.long  0xf8a1a159,0xf8a1a159
+.long  0x80898909,0x80898909
+.long  0x170d0d1a,0x170d0d1a
+.long  0xdabfbf65,0xdabfbf65
+.long  0x31e6e6d7,0x31e6e6d7
+.long  0xc6424284,0xc6424284
+.long  0xb86868d0,0xb86868d0
+.long  0xc3414182,0xc3414182
+.long  0xb0999929,0xb0999929
+.long  0x772d2d5a,0x772d2d5a
+.long  0x110f0f1e,0x110f0f1e
+.long  0xcbb0b07b,0xcbb0b07b
+.long  0xfc5454a8,0xfc5454a8
+.long  0xd6bbbb6d,0xd6bbbb6d
+.long  0x3a16162c,0x3a16162c
+.byte  0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte  0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte  0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte  0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte  0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte  0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte  0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte  0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte  0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte  0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte  0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte  0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte  0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte  0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte  0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte  0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte  0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte  0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte  0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte  0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte  0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte  0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte  0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte  0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte  0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte  0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte  0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte  0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte  0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte  0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte  0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte  0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte  0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte  0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte  0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte  0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte  0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte  0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte  0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte  0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte  0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte  0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte  0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte  0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte  0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte  0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte  0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte  0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte  0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte  0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte  0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte  0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte  0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte  0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte  0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte  0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte  0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte  0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte  0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte  0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte  0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte  0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte  0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte  0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte  0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte  0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte  0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte  0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte  0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte  0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte  0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte  0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte  0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte  0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte  0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte  0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte  0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte  0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte  0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte  0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte  0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte  0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte  0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte  0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte  0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte  0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte  0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte  0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte  0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte  0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte  0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte  0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte  0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte  0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte  0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte  0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte  0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte  0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte  0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte  0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte  0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte  0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte  0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte  0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte  0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte  0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte  0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte  0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte  0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte  0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte  0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte  0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte  0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte  0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte  0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte  0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte  0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte  0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte  0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte  0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte  0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte  0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte  0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte  0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte  0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte  0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte  0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte  0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.long  0x00000001, 0x00000002, 0x00000004, 0x00000008
+.long  0x00000010, 0x00000020, 0x00000040, 0x00000080
+.long  0x0000001b, 0x00000036, 0x80808080, 0x80808080
+.long  0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
+.align 64
+.LAES_Td:
+.long  0x50a7f451,0x50a7f451
+.long  0x5365417e,0x5365417e
+.long  0xc3a4171a,0xc3a4171a
+.long  0x965e273a,0x965e273a
+.long  0xcb6bab3b,0xcb6bab3b
+.long  0xf1459d1f,0xf1459d1f
+.long  0xab58faac,0xab58faac
+.long  0x9303e34b,0x9303e34b
+.long  0x55fa3020,0x55fa3020
+.long  0xf66d76ad,0xf66d76ad
+.long  0x9176cc88,0x9176cc88
+.long  0x254c02f5,0x254c02f5
+.long  0xfcd7e54f,0xfcd7e54f
+.long  0xd7cb2ac5,0xd7cb2ac5
+.long  0x80443526,0x80443526
+.long  0x8fa362b5,0x8fa362b5
+.long  0x495ab1de,0x495ab1de
+.long  0x671bba25,0x671bba25
+.long  0x980eea45,0x980eea45
+.long  0xe1c0fe5d,0xe1c0fe5d
+.long  0x02752fc3,0x02752fc3
+.long  0x12f04c81,0x12f04c81
+.long  0xa397468d,0xa397468d
+.long  0xc6f9d36b,0xc6f9d36b
+.long  0xe75f8f03,0xe75f8f03
+.long  0x959c9215,0x959c9215
+.long  0xeb7a6dbf,0xeb7a6dbf
+.long  0xda595295,0xda595295
+.long  0x2d83bed4,0x2d83bed4
+.long  0xd3217458,0xd3217458
+.long  0x2969e049,0x2969e049
+.long  0x44c8c98e,0x44c8c98e
+.long  0x6a89c275,0x6a89c275
+.long  0x78798ef4,0x78798ef4
+.long  0x6b3e5899,0x6b3e5899
+.long  0xdd71b927,0xdd71b927
+.long  0xb64fe1be,0xb64fe1be
+.long  0x17ad88f0,0x17ad88f0
+.long  0x66ac20c9,0x66ac20c9
+.long  0xb43ace7d,0xb43ace7d
+.long  0x184adf63,0x184adf63
+.long  0x82311ae5,0x82311ae5
+.long  0x60335197,0x60335197
+.long  0x457f5362,0x457f5362
+.long  0xe07764b1,0xe07764b1
+.long  0x84ae6bbb,0x84ae6bbb
+.long  0x1ca081fe,0x1ca081fe
+.long  0x942b08f9,0x942b08f9
+.long  0x58684870,0x58684870
+.long  0x19fd458f,0x19fd458f
+.long  0x876cde94,0x876cde94
+.long  0xb7f87b52,0xb7f87b52
+.long  0x23d373ab,0x23d373ab
+.long  0xe2024b72,0xe2024b72
+.long  0x578f1fe3,0x578f1fe3
+.long  0x2aab5566,0x2aab5566
+.long  0x0728ebb2,0x0728ebb2
+.long  0x03c2b52f,0x03c2b52f
+.long  0x9a7bc586,0x9a7bc586
+.long  0xa50837d3,0xa50837d3
+.long  0xf2872830,0xf2872830
+.long  0xb2a5bf23,0xb2a5bf23
+.long  0xba6a0302,0xba6a0302
+.long  0x5c8216ed,0x5c8216ed
+.long  0x2b1ccf8a,0x2b1ccf8a
+.long  0x92b479a7,0x92b479a7
+.long  0xf0f207f3,0xf0f207f3
+.long  0xa1e2694e,0xa1e2694e
+.long  0xcdf4da65,0xcdf4da65
+.long  0xd5be0506,0xd5be0506
+.long  0x1f6234d1,0x1f6234d1
+.long  0x8afea6c4,0x8afea6c4
+.long  0x9d532e34,0x9d532e34
+.long  0xa055f3a2,0xa055f3a2
+.long  0x32e18a05,0x32e18a05
+.long  0x75ebf6a4,0x75ebf6a4
+.long  0x39ec830b,0x39ec830b
+.long  0xaaef6040,0xaaef6040
+.long  0x069f715e,0x069f715e
+.long  0x51106ebd,0x51106ebd
+.long  0xf98a213e,0xf98a213e
+.long  0x3d06dd96,0x3d06dd96
+.long  0xae053edd,0xae053edd
+.long  0x46bde64d,0x46bde64d
+.long  0xb58d5491,0xb58d5491
+.long  0x055dc471,0x055dc471
+.long  0x6fd40604,0x6fd40604
+.long  0xff155060,0xff155060
+.long  0x24fb9819,0x24fb9819
+.long  0x97e9bdd6,0x97e9bdd6
+.long  0xcc434089,0xcc434089
+.long  0x779ed967,0x779ed967
+.long  0xbd42e8b0,0xbd42e8b0
+.long  0x888b8907,0x888b8907
+.long  0x385b19e7,0x385b19e7
+.long  0xdbeec879,0xdbeec879
+.long  0x470a7ca1,0x470a7ca1
+.long  0xe90f427c,0xe90f427c
+.long  0xc91e84f8,0xc91e84f8
+.long  0x00000000,0x00000000
+.long  0x83868009,0x83868009
+.long  0x48ed2b32,0x48ed2b32
+.long  0xac70111e,0xac70111e
+.long  0x4e725a6c,0x4e725a6c
+.long  0xfbff0efd,0xfbff0efd
+.long  0x5638850f,0x5638850f
+.long  0x1ed5ae3d,0x1ed5ae3d
+.long  0x27392d36,0x27392d36
+.long  0x64d90f0a,0x64d90f0a
+.long  0x21a65c68,0x21a65c68
+.long  0xd1545b9b,0xd1545b9b
+.long  0x3a2e3624,0x3a2e3624
+.long  0xb1670a0c,0xb1670a0c
+.long  0x0fe75793,0x0fe75793
+.long  0xd296eeb4,0xd296eeb4
+.long  0x9e919b1b,0x9e919b1b
+.long  0x4fc5c080,0x4fc5c080
+.long  0xa220dc61,0xa220dc61
+.long  0x694b775a,0x694b775a
+.long  0x161a121c,0x161a121c
+.long  0x0aba93e2,0x0aba93e2
+.long  0xe52aa0c0,0xe52aa0c0
+.long  0x43e0223c,0x43e0223c
+.long  0x1d171b12,0x1d171b12
+.long  0x0b0d090e,0x0b0d090e
+.long  0xadc78bf2,0xadc78bf2
+.long  0xb9a8b62d,0xb9a8b62d
+.long  0xc8a91e14,0xc8a91e14
+.long  0x8519f157,0x8519f157
+.long  0x4c0775af,0x4c0775af
+.long  0xbbdd99ee,0xbbdd99ee
+.long  0xfd607fa3,0xfd607fa3
+.long  0x9f2601f7,0x9f2601f7
+.long  0xbcf5725c,0xbcf5725c
+.long  0xc53b6644,0xc53b6644
+.long  0x347efb5b,0x347efb5b
+.long  0x7629438b,0x7629438b
+.long  0xdcc623cb,0xdcc623cb
+.long  0x68fcedb6,0x68fcedb6
+.long  0x63f1e4b8,0x63f1e4b8
+.long  0xcadc31d7,0xcadc31d7
+.long  0x10856342,0x10856342
+.long  0x40229713,0x40229713
+.long  0x2011c684,0x2011c684
+.long  0x7d244a85,0x7d244a85
+.long  0xf83dbbd2,0xf83dbbd2
+.long  0x1132f9ae,0x1132f9ae
+.long  0x6da129c7,0x6da129c7
+.long  0x4b2f9e1d,0x4b2f9e1d
+.long  0xf330b2dc,0xf330b2dc
+.long  0xec52860d,0xec52860d
+.long  0xd0e3c177,0xd0e3c177
+.long  0x6c16b32b,0x6c16b32b
+.long  0x99b970a9,0x99b970a9
+.long  0xfa489411,0xfa489411
+.long  0x2264e947,0x2264e947
+.long  0xc48cfca8,0xc48cfca8
+.long  0x1a3ff0a0,0x1a3ff0a0
+.long  0xd82c7d56,0xd82c7d56
+.long  0xef903322,0xef903322
+.long  0xc74e4987,0xc74e4987
+.long  0xc1d138d9,0xc1d138d9
+.long  0xfea2ca8c,0xfea2ca8c
+.long  0x360bd498,0x360bd498
+.long  0xcf81f5a6,0xcf81f5a6
+.long  0x28de7aa5,0x28de7aa5
+.long  0x268eb7da,0x268eb7da
+.long  0xa4bfad3f,0xa4bfad3f
+.long  0xe49d3a2c,0xe49d3a2c
+.long  0x0d927850,0x0d927850
+.long  0x9bcc5f6a,0x9bcc5f6a
+.long  0x62467e54,0x62467e54
+.long  0xc2138df6,0xc2138df6
+.long  0xe8b8d890,0xe8b8d890
+.long  0x5ef7392e,0x5ef7392e
+.long  0xf5afc382,0xf5afc382
+.long  0xbe805d9f,0xbe805d9f
+.long  0x7c93d069,0x7c93d069
+.long  0xa92dd56f,0xa92dd56f
+.long  0xb31225cf,0xb31225cf
+.long  0x3b99acc8,0x3b99acc8
+.long  0xa77d1810,0xa77d1810
+.long  0x6e639ce8,0x6e639ce8
+.long  0x7bbb3bdb,0x7bbb3bdb
+.long  0x097826cd,0x097826cd
+.long  0xf418596e,0xf418596e
+.long  0x01b79aec,0x01b79aec
+.long  0xa89a4f83,0xa89a4f83
+.long  0x656e95e6,0x656e95e6
+.long  0x7ee6ffaa,0x7ee6ffaa
+.long  0x08cfbc21,0x08cfbc21
+.long  0xe6e815ef,0xe6e815ef
+.long  0xd99be7ba,0xd99be7ba
+.long  0xce366f4a,0xce366f4a
+.long  0xd4099fea,0xd4099fea
+.long  0xd67cb029,0xd67cb029
+.long  0xafb2a431,0xafb2a431
+.long  0x31233f2a,0x31233f2a
+.long  0x3094a5c6,0x3094a5c6
+.long  0xc066a235,0xc066a235
+.long  0x37bc4e74,0x37bc4e74
+.long  0xa6ca82fc,0xa6ca82fc
+.long  0xb0d090e0,0xb0d090e0
+.long  0x15d8a733,0x15d8a733
+.long  0x4a9804f1,0x4a9804f1
+.long  0xf7daec41,0xf7daec41
+.long  0x0e50cd7f,0x0e50cd7f
+.long  0x2ff69117,0x2ff69117
+.long  0x8dd64d76,0x8dd64d76
+.long  0x4db0ef43,0x4db0ef43
+.long  0x544daacc,0x544daacc
+.long  0xdf0496e4,0xdf0496e4
+.long  0xe3b5d19e,0xe3b5d19e
+.long  0x1b886a4c,0x1b886a4c
+.long  0xb81f2cc1,0xb81f2cc1
+.long  0x7f516546,0x7f516546
+.long  0x04ea5e9d,0x04ea5e9d
+.long  0x5d358c01,0x5d358c01
+.long  0x737487fa,0x737487fa
+.long  0x2e410bfb,0x2e410bfb
+.long  0x5a1d67b3,0x5a1d67b3
+.long  0x52d2db92,0x52d2db92
+.long  0x335610e9,0x335610e9
+.long  0x1347d66d,0x1347d66d
+.long  0x8c61d79a,0x8c61d79a
+.long  0x7a0ca137,0x7a0ca137
+.long  0x8e14f859,0x8e14f859
+.long  0x893c13eb,0x893c13eb
+.long  0xee27a9ce,0xee27a9ce
+.long  0x35c961b7,0x35c961b7
+.long  0xede51ce1,0xede51ce1
+.long  0x3cb1477a,0x3cb1477a
+.long  0x59dfd29c,0x59dfd29c
+.long  0x3f73f255,0x3f73f255
+.long  0x79ce1418,0x79ce1418
+.long  0xbf37c773,0xbf37c773
+.long  0xeacdf753,0xeacdf753
+.long  0x5baafd5f,0x5baafd5f
+.long  0x146f3ddf,0x146f3ddf
+.long  0x86db4478,0x86db4478
+.long  0x81f3afca,0x81f3afca
+.long  0x3ec468b9,0x3ec468b9
+.long  0x2c342438,0x2c342438
+.long  0x5f40a3c2,0x5f40a3c2
+.long  0x72c31d16,0x72c31d16
+.long  0x0c25e2bc,0x0c25e2bc
+.long  0x8b493c28,0x8b493c28
+.long  0x41950dff,0x41950dff
+.long  0x7101a839,0x7101a839
+.long  0xdeb30c08,0xdeb30c08
+.long  0x9ce4b4d8,0x9ce4b4d8
+.long  0x90c15664,0x90c15664
+.long  0x6184cb7b,0x6184cb7b
+.long  0x70b632d5,0x70b632d5
+.long  0x745c6c48,0x745c6c48
+.long  0x4257b8d0,0x4257b8d0
+.byte  0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte  0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte  0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte  0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte  0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte  0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte  0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte  0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte  0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte  0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte  0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte  0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte  0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte  0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte  0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte  0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte  0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte  0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte  0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte  0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte  0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte  0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte  0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte  0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte  0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte  0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte  0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte  0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte  0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte  0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte  0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte  0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long  0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long  0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte  0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte  0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte  0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte  0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte  0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte  0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte  0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte  0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte  0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte  0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte  0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte  0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte  0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte  0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte  0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte  0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte  0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte  0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte  0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte  0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte  0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte  0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte  0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte  0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte  0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte  0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte  0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte  0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte  0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte  0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte  0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte  0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long  0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long  0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte  0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte  0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte  0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte  0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte  0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte  0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte  0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte  0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte  0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte  0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte  0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte  0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte  0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte  0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte  0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte  0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte  0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte  0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte  0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte  0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte  0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte  0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte  0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte  0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte  0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte  0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte  0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte  0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte  0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte  0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte  0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte  0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long  0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long  0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte  0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte  0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte  0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte  0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte  0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte  0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte  0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte  0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte  0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte  0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte  0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte  0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte  0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte  0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte  0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte  0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte  0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte  0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte  0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte  0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte  0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte  0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte  0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte  0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte  0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte  0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte  0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte  0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte  0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte  0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte  0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte  0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long  0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long  0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte  65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
diff --git a/secure/lib/libcrypto/asm/bf-586.s b/secure/lib/libcrypto/asm/bf-586.s
new file mode 100644 (file)
index 0000000..aa718d4
--- /dev/null
@@ -0,0 +1,896 @@
+.file  "bf-586.s"
+.text
+.globl BF_encrypt
+.type  BF_encrypt,@function
+.align 16
+BF_encrypt:
+.L_BF_encrypt_begin:
+
+       pushl   %ebp
+       pushl   %ebx
+       movl    12(%esp),%ebx
+       movl    16(%esp),%ebp
+       pushl   %esi
+       pushl   %edi
+
+       movl    (%ebx),%edi
+       movl    4(%ebx),%esi
+       xorl    %eax,%eax
+       movl    (%ebp),%ebx
+       xorl    %ecx,%ecx
+       xorl    %ebx,%edi
+
+
+       movl    4(%ebp),%edx
+       movl    %edi,%ebx
+       xorl    %edx,%esi
+       shrl    $16,%ebx
+       movl    %edi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%esi
+
+
+       movl    8(%ebp),%edx
+       movl    %esi,%ebx
+       xorl    %edx,%edi
+       shrl    $16,%ebx
+       movl    %esi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%edi
+
+
+       movl    12(%ebp),%edx
+       movl    %edi,%ebx
+       xorl    %edx,%esi
+       shrl    $16,%ebx
+       movl    %edi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%esi
+
+
+       movl    16(%ebp),%edx
+       movl    %esi,%ebx
+       xorl    %edx,%edi
+       shrl    $16,%ebx
+       movl    %esi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%edi
+
+
+       movl    20(%ebp),%edx
+       movl    %edi,%ebx
+       xorl    %edx,%esi
+       shrl    $16,%ebx
+       movl    %edi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%esi
+
+
+       movl    24(%ebp),%edx
+       movl    %esi,%ebx
+       xorl    %edx,%edi
+       shrl    $16,%ebx
+       movl    %esi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%edi
+
+
+       movl    28(%ebp),%edx
+       movl    %edi,%ebx
+       xorl    %edx,%esi
+       shrl    $16,%ebx
+       movl    %edi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%esi
+
+
+       movl    32(%ebp),%edx
+       movl    %esi,%ebx
+       xorl    %edx,%edi
+       shrl    $16,%ebx
+       movl    %esi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%edi
+
+
+       movl    36(%ebp),%edx
+       movl    %edi,%ebx
+       xorl    %edx,%esi
+       shrl    $16,%ebx
+       movl    %edi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%esi
+
+
+       movl    40(%ebp),%edx
+       movl    %esi,%ebx
+       xorl    %edx,%edi
+       shrl    $16,%ebx
+       movl    %esi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%edi
+
+
+       movl    44(%ebp),%edx
+       movl    %edi,%ebx
+       xorl    %edx,%esi
+       shrl    $16,%ebx
+       movl    %edi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%esi
+
+
+       movl    48(%ebp),%edx
+       movl    %esi,%ebx
+       xorl    %edx,%edi
+       shrl    $16,%ebx
+       movl    %esi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%edi
+
+
+       movl    52(%ebp),%edx
+       movl    %edi,%ebx
+       xorl    %edx,%esi
+       shrl    $16,%ebx
+       movl    %edi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%esi
+
+
+       movl    56(%ebp),%edx
+       movl    %esi,%ebx
+       xorl    %edx,%edi
+       shrl    $16,%ebx
+       movl    %esi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%edi
+
+
+       movl    60(%ebp),%edx
+       movl    %edi,%ebx
+       xorl    %edx,%esi
+       shrl    $16,%ebx
+       movl    %edi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%esi
+
+
+       movl    64(%ebp),%edx
+       movl    %esi,%ebx
+       xorl    %edx,%edi
+       shrl    $16,%ebx
+       movl    %esi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+
+       movl    20(%esp),%eax
+       xorl    %ebx,%edi
+       movl    68(%ebp),%edx
+       xorl    %edx,%esi
+       movl    %edi,4(%eax)
+       movl    %esi,(%eax)
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  BF_encrypt,.-.L_BF_encrypt_begin
+.globl BF_decrypt
+.type  BF_decrypt,@function
+.align 16
+BF_decrypt:
+.L_BF_decrypt_begin:
+
+       pushl   %ebp
+       pushl   %ebx
+       movl    12(%esp),%ebx
+       movl    16(%esp),%ebp
+       pushl   %esi
+       pushl   %edi
+
+       movl    (%ebx),%edi
+       movl    4(%ebx),%esi
+       xorl    %eax,%eax
+       movl    68(%ebp),%ebx
+       xorl    %ecx,%ecx
+       xorl    %ebx,%edi
+
+
+       movl    64(%ebp),%edx
+       movl    %edi,%ebx
+       xorl    %edx,%esi
+       shrl    $16,%ebx
+       movl    %edi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%esi
+
+
+       movl    60(%ebp),%edx
+       movl    %esi,%ebx
+       xorl    %edx,%edi
+       shrl    $16,%ebx
+       movl    %esi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%edi
+
+
+       movl    56(%ebp),%edx
+       movl    %edi,%ebx
+       xorl    %edx,%esi
+       shrl    $16,%ebx
+       movl    %edi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%esi
+
+
+       movl    52(%ebp),%edx
+       movl    %esi,%ebx
+       xorl    %edx,%edi
+       shrl    $16,%ebx
+       movl    %esi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%edi
+
+
+       movl    48(%ebp),%edx
+       movl    %edi,%ebx
+       xorl    %edx,%esi
+       shrl    $16,%ebx
+       movl    %edi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%esi
+
+
+       movl    44(%ebp),%edx
+       movl    %esi,%ebx
+       xorl    %edx,%edi
+       shrl    $16,%ebx
+       movl    %esi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%edi
+
+
+       movl    40(%ebp),%edx
+       movl    %edi,%ebx
+       xorl    %edx,%esi
+       shrl    $16,%ebx
+       movl    %edi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%esi
+
+
+       movl    36(%ebp),%edx
+       movl    %esi,%ebx
+       xorl    %edx,%edi
+       shrl    $16,%ebx
+       movl    %esi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%edi
+
+
+       movl    32(%ebp),%edx
+       movl    %edi,%ebx
+       xorl    %edx,%esi
+       shrl    $16,%ebx
+       movl    %edi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%esi
+
+
+       movl    28(%ebp),%edx
+       movl    %esi,%ebx
+       xorl    %edx,%edi
+       shrl    $16,%ebx
+       movl    %esi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%edi
+
+
+       movl    24(%ebp),%edx
+       movl    %edi,%ebx
+       xorl    %edx,%esi
+       shrl    $16,%ebx
+       movl    %edi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%esi
+
+
+       movl    20(%ebp),%edx
+       movl    %esi,%ebx
+       xorl    %edx,%edi
+       shrl    $16,%ebx
+       movl    %esi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%edi
+
+
+       movl    16(%ebp),%edx
+       movl    %edi,%ebx
+       xorl    %edx,%esi
+       shrl    $16,%ebx
+       movl    %edi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%esi
+
+
+       movl    12(%ebp),%edx
+       movl    %esi,%ebx
+       xorl    %edx,%edi
+       shrl    $16,%ebx
+       movl    %esi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%edi
+
+
+       movl    8(%ebp),%edx
+       movl    %edi,%ebx
+       xorl    %edx,%esi
+       shrl    $16,%ebx
+       movl    %edi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+       xorl    %eax,%eax
+       xorl    %ebx,%esi
+
+
+       movl    4(%ebp),%edx
+       movl    %esi,%ebx
+       xorl    %edx,%edi
+       shrl    $16,%ebx
+       movl    %esi,%edx
+       movb    %bh,%al
+       andl    $255,%ebx
+       movb    %dh,%cl
+       andl    $255,%edx
+       movl    72(%ebp,%eax,4),%eax
+       movl    1096(%ebp,%ebx,4),%ebx
+       addl    %eax,%ebx
+       movl    2120(%ebp,%ecx,4),%eax
+       xorl    %eax,%ebx
+       movl    3144(%ebp,%edx,4),%edx
+       addl    %edx,%ebx
+
+       movl    20(%esp),%eax
+       xorl    %ebx,%edi
+       movl    (%ebp),%edx
+       xorl    %edx,%esi
+       movl    %edi,4(%eax)
+       movl    %esi,(%eax)
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  BF_decrypt,.-.L_BF_decrypt_begin
+.globl BF_cbc_encrypt
+.type  BF_cbc_encrypt,@function
+.align 16
+BF_cbc_encrypt:
+.L_BF_cbc_encrypt_begin:
+
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    28(%esp),%ebp
+
+       movl    36(%esp),%ebx
+       movl    (%ebx),%esi
+       movl    4(%ebx),%edi
+       pushl   %edi
+       pushl   %esi
+       pushl   %edi
+       pushl   %esi
+       movl    %esp,%ebx
+       movl    36(%esp),%esi
+       movl    40(%esp),%edi
+
+       movl    56(%esp),%ecx
+
+       movl    48(%esp),%eax
+       pushl   %eax
+       pushl   %ebx
+       cmpl    $0,%ecx
+       jz      .L000decrypt
+       andl    $4294967288,%ebp
+       movl    8(%esp),%eax
+       movl    12(%esp),%ebx
+       jz      .L001encrypt_finish
+.L002encrypt_loop:
+       movl    (%esi),%ecx
+       movl    4(%esi),%edx
+       xorl    %ecx,%eax
+       xorl    %edx,%ebx
+       bswap   %eax
+       bswap   %ebx
+       movl    %eax,8(%esp)
+       movl    %ebx,12(%esp)
+       call    .L_BF_encrypt_begin
+       movl    8(%esp),%eax
+       movl    12(%esp),%ebx
+       bswap   %eax
+       bswap   %ebx
+       movl    %eax,(%edi)
+       movl    %ebx,4(%edi)
+       addl    $8,%esi
+       addl    $8,%edi
+       subl    $8,%ebp
+       jnz     .L002encrypt_loop
+.L001encrypt_finish:
+       movl    52(%esp),%ebp
+       andl    $7,%ebp
+       jz      .L003finish
+       call    .L004PIC_point
+.L004PIC_point:
+       popl    %edx
+       leal    .L005cbc_enc_jmp_table-.L004PIC_point(%edx),%ecx
+       movl    (%ecx,%ebp,4),%ebp
+       addl    %edx,%ebp
+       xorl    %ecx,%ecx
+       xorl    %edx,%edx
+       jmp     *%ebp
+.L006ej7:
+       movb    6(%esi),%dh
+       shll    $8,%edx
+.L007ej6:
+       movb    5(%esi),%dh
+.L008ej5:
+       movb    4(%esi),%dl
+.L009ej4:
+       movl    (%esi),%ecx
+       jmp     .L010ejend
+.L011ej3:
+       movb    2(%esi),%ch
+       shll    $8,%ecx
+.L012ej2:
+       movb    1(%esi),%ch
+.L013ej1:
+       movb    (%esi),%cl
+.L010ejend:
+       xorl    %ecx,%eax
+       xorl    %edx,%ebx
+       bswap   %eax
+       bswap   %ebx
+       movl    %eax,8(%esp)
+       movl    %ebx,12(%esp)
+       call    .L_BF_encrypt_begin
+       movl    8(%esp),%eax
+       movl    12(%esp),%ebx
+       bswap   %eax
+       bswap   %ebx
+       movl    %eax,(%edi)
+       movl    %ebx,4(%edi)
+       jmp     .L003finish
+.L000decrypt:
+       andl    $4294967288,%ebp
+       movl    16(%esp),%eax
+       movl    20(%esp),%ebx
+       jz      .L014decrypt_finish
+.L015decrypt_loop:
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       bswap   %eax
+       bswap   %ebx
+       movl    %eax,8(%esp)
+       movl    %ebx,12(%esp)
+       call    .L_BF_decrypt_begin
+       movl    8(%esp),%eax
+       movl    12(%esp),%ebx
+       bswap   %eax
+       bswap   %ebx
+       movl    16(%esp),%ecx
+       movl    20(%esp),%edx
+       xorl    %eax,%ecx
+       xorl    %ebx,%edx
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    %ecx,(%edi)
+       movl    %edx,4(%edi)
+       movl    %eax,16(%esp)
+       movl    %ebx,20(%esp)
+       addl    $8,%esi
+       addl    $8,%edi
+       subl    $8,%ebp
+       jnz     .L015decrypt_loop
+.L014decrypt_finish:
+       movl    52(%esp),%ebp
+       andl    $7,%ebp
+       jz      .L003finish
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       bswap   %eax
+       bswap   %ebx
+       movl    %eax,8(%esp)
+       movl    %ebx,12(%esp)
+       call    .L_BF_decrypt_begin
+       movl    8(%esp),%eax
+       movl    12(%esp),%ebx
+       bswap   %eax
+       bswap   %ebx
+       movl    16(%esp),%ecx
+       movl    20(%esp),%edx
+       xorl    %eax,%ecx
+       xorl    %ebx,%edx
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+.L016dj7:
+       rorl    $16,%edx
+       movb    %dl,6(%edi)
+       shrl    $16,%edx
+.L017dj6:
+       movb    %dh,5(%edi)
+.L018dj5:
+       movb    %dl,4(%edi)
+.L019dj4:
+       movl    %ecx,(%edi)
+       jmp     .L020djend
+.L021dj3:
+       rorl    $16,%ecx
+       movb    %cl,2(%edi)
+       shll    $16,%ecx
+.L022dj2:
+       movb    %ch,1(%esi)
+.L023dj1:
+       movb    %cl,(%esi)
+.L020djend:
+       jmp     .L003finish
+.L003finish:
+       movl    60(%esp),%ecx
+       addl    $24,%esp
+       movl    %eax,(%ecx)
+       movl    %ebx,4(%ecx)
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.align 64
+.L005cbc_enc_jmp_table:
+.long  0
+.long  .L013ej1-.L004PIC_point
+.long  .L012ej2-.L004PIC_point
+.long  .L011ej3-.L004PIC_point
+.long  .L009ej4-.L004PIC_point
+.long  .L008ej5-.L004PIC_point
+.long  .L007ej6-.L004PIC_point
+.long  .L006ej7-.L004PIC_point
+.align 64
+.size  BF_cbc_encrypt,.-.L_BF_cbc_encrypt_begin
diff --git a/secure/lib/libcrypto/asm/bn-586.s b/secure/lib/libcrypto/asm/bn-586.s
new file mode 100644 (file)
index 0000000..d071c2f
--- /dev/null
@@ -0,0 +1,1521 @@
+.file  "../../../../crypto/openssl/crypto/bn/asm/bn-586.s"
+.text
+.globl bn_mul_add_words
+.type  bn_mul_add_words,@function
+.align 16
+bn_mul_add_words:
+.L_bn_mul_add_words_begin:
+       leal    OPENSSL_ia32cap_P,%eax
+       btl     $26,(%eax)
+       jnc     .L000maw_non_sse2
+       movl    4(%esp),%eax
+       movl    8(%esp),%edx
+       movl    12(%esp),%ecx
+       movd    16(%esp),%mm0
+       pxor    %mm1,%mm1
+       jmp     .L001maw_sse2_entry
+.align 16
+.L002maw_sse2_unrolled:
+       movd    (%eax),%mm3
+       paddq   %mm3,%mm1
+       movd    (%edx),%mm2
+       pmuludq %mm0,%mm2
+       movd    4(%edx),%mm4
+       pmuludq %mm0,%mm4
+       movd    8(%edx),%mm6
+       pmuludq %mm0,%mm6
+       movd    12(%edx),%mm7
+       pmuludq %mm0,%mm7
+       paddq   %mm2,%mm1
+       movd    4(%eax),%mm3
+       paddq   %mm4,%mm3
+       movd    8(%eax),%mm5
+       paddq   %mm6,%mm5
+       movd    12(%eax),%mm4
+       paddq   %mm4,%mm7
+       movd    %mm1,(%eax)
+       movd    16(%edx),%mm2
+       pmuludq %mm0,%mm2
+       psrlq   $32,%mm1
+       movd    20(%edx),%mm4
+       pmuludq %mm0,%mm4
+       paddq   %mm3,%mm1
+       movd    24(%edx),%mm6
+       pmuludq %mm0,%mm6
+       movd    %mm1,4(%eax)
+       psrlq   $32,%mm1
+       movd    28(%edx),%mm3
+       addl    $32,%edx
+       pmuludq %mm0,%mm3
+       paddq   %mm5,%mm1
+       movd    16(%eax),%mm5
+       paddq   %mm5,%mm2
+       movd    %mm1,8(%eax)
+       psrlq   $32,%mm1
+       paddq   %mm7,%mm1
+       movd    20(%eax),%mm5
+       paddq   %mm5,%mm4
+       movd    %mm1,12(%eax)
+       psrlq   $32,%mm1
+       paddq   %mm2,%mm1
+       movd    24(%eax),%mm5
+       paddq   %mm5,%mm6
+       movd    %mm1,16(%eax)
+       psrlq   $32,%mm1
+       paddq   %mm4,%mm1
+       movd    28(%eax),%mm5
+       paddq   %mm5,%mm3
+       movd    %mm1,20(%eax)
+       psrlq   $32,%mm1
+       paddq   %mm6,%mm1
+       movd    %mm1,24(%eax)
+       psrlq   $32,%mm1
+       paddq   %mm3,%mm1
+       movd    %mm1,28(%eax)
+       leal    32(%eax),%eax
+       psrlq   $32,%mm1
+       subl    $8,%ecx
+       jz      .L003maw_sse2_exit
+.L001maw_sse2_entry:
+       testl   $4294967288,%ecx
+       jnz     .L002maw_sse2_unrolled
+.align 4
+.L004maw_sse2_loop:
+       movd    (%edx),%mm2
+       movd    (%eax),%mm3
+       pmuludq %mm0,%mm2
+       leal    4(%edx),%edx
+       paddq   %mm3,%mm1
+       paddq   %mm2,%mm1
+       movd    %mm1,(%eax)
+       subl    $1,%ecx
+       psrlq   $32,%mm1
+       leal    4(%eax),%eax
+       jnz     .L004maw_sse2_loop
+.L003maw_sse2_exit:
+       movd    %mm1,%eax
+       emms
+       ret
+.align 16
+.L000maw_non_sse2:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+
+       xorl    %esi,%esi
+       movl    20(%esp),%edi
+       movl    28(%esp),%ecx
+       movl    24(%esp),%ebx
+       andl    $4294967288,%ecx
+       movl    32(%esp),%ebp
+       pushl   %ecx
+       jz      .L005maw_finish
+.align 16
+.L006maw_loop:
+
+       movl    (%ebx),%eax
+       mull    %ebp
+       addl    %esi,%eax
+       adcl    $0,%edx
+       addl    (%edi),%eax
+       adcl    $0,%edx
+       movl    %eax,(%edi)
+       movl    %edx,%esi
+
+       movl    4(%ebx),%eax
+       mull    %ebp
+       addl    %esi,%eax
+       adcl    $0,%edx
+       addl    4(%edi),%eax
+       adcl    $0,%edx
+       movl    %eax,4(%edi)
+       movl    %edx,%esi
+
+       movl    8(%ebx),%eax
+       mull    %ebp
+       addl    %esi,%eax
+       adcl    $0,%edx
+       addl    8(%edi),%eax
+       adcl    $0,%edx
+       movl    %eax,8(%edi)
+       movl    %edx,%esi
+
+       movl    12(%ebx),%eax
+       mull    %ebp
+       addl    %esi,%eax
+       adcl    $0,%edx
+       addl    12(%edi),%eax
+       adcl    $0,%edx
+       movl    %eax,12(%edi)
+       movl    %edx,%esi
+
+       movl    16(%ebx),%eax
+       mull    %ebp
+       addl    %esi,%eax
+       adcl    $0,%edx
+       addl    16(%edi),%eax
+       adcl    $0,%edx
+       movl    %eax,16(%edi)
+       movl    %edx,%esi
+
+       movl    20(%ebx),%eax
+       mull    %ebp
+       addl    %esi,%eax
+       adcl    $0,%edx
+       addl    20(%edi),%eax
+       adcl    $0,%edx
+       movl    %eax,20(%edi)
+       movl    %edx,%esi
+
+       movl    24(%ebx),%eax
+       mull    %ebp
+       addl    %esi,%eax
+       adcl    $0,%edx
+       addl    24(%edi),%eax
+       adcl    $0,%edx
+       movl    %eax,24(%edi)
+       movl    %edx,%esi
+
+       movl    28(%ebx),%eax
+       mull    %ebp
+       addl    %esi,%eax
+       adcl    $0,%edx
+       addl    28(%edi),%eax
+       adcl    $0,%edx
+       movl    %eax,28(%edi)
+       movl    %edx,%esi
+
+       subl    $8,%ecx
+       leal    32(%ebx),%ebx
+       leal    32(%edi),%edi
+       jnz     .L006maw_loop
+.L005maw_finish:
+       movl    32(%esp),%ecx
+       andl    $7,%ecx
+       jnz     .L007maw_finish2
+       jmp     .L008maw_end
+.L007maw_finish2:
+
+       movl    (%ebx),%eax
+       mull    %ebp
+       addl    %esi,%eax
+       adcl    $0,%edx
+       addl    (%edi),%eax
+       adcl    $0,%edx
+       decl    %ecx
+       movl    %eax,(%edi)
+       movl    %edx,%esi
+       jz      .L008maw_end
+
+       movl    4(%ebx),%eax
+       mull    %ebp
+       addl    %esi,%eax
+       adcl    $0,%edx
+       addl    4(%edi),%eax
+       adcl    $0,%edx
+       decl    %ecx
+       movl    %eax,4(%edi)
+       movl    %edx,%esi
+       jz      .L008maw_end
+
+       movl    8(%ebx),%eax
+       mull    %ebp
+       addl    %esi,%eax
+       adcl    $0,%edx
+       addl    8(%edi),%eax
+       adcl    $0,%edx
+       decl    %ecx
+       movl    %eax,8(%edi)
+       movl    %edx,%esi
+       jz      .L008maw_end
+
+       movl    12(%ebx),%eax
+       mull    %ebp
+       addl    %esi,%eax
+       adcl    $0,%edx
+       addl    12(%edi),%eax
+       adcl    $0,%edx
+       decl    %ecx
+       movl    %eax,12(%edi)
+       movl    %edx,%esi
+       jz      .L008maw_end
+
+       movl    16(%ebx),%eax
+       mull    %ebp
+       addl    %esi,%eax
+       adcl    $0,%edx
+       addl    16(%edi),%eax
+       adcl    $0,%edx
+       decl    %ecx
+       movl    %eax,16(%edi)
+       movl    %edx,%esi
+       jz      .L008maw_end
+
+       movl    20(%ebx),%eax
+       mull    %ebp
+       addl    %esi,%eax
+       adcl    $0,%edx
+       addl    20(%edi),%eax
+       adcl    $0,%edx
+       decl    %ecx
+       movl    %eax,20(%edi)
+       movl    %edx,%esi
+       jz      .L008maw_end
+
+       movl    24(%ebx),%eax
+       mull    %ebp
+       addl    %esi,%eax
+       adcl    $0,%edx
+       addl    24(%edi),%eax
+       adcl    $0,%edx
+       movl    %eax,24(%edi)
+       movl    %edx,%esi
+.L008maw_end:
+       movl    %esi,%eax
+       popl    %ecx
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  bn_mul_add_words,.-.L_bn_mul_add_words_begin
+.globl bn_mul_words
+.type  bn_mul_words,@function
+.align 16
+bn_mul_words:
+.L_bn_mul_words_begin:
+       leal    OPENSSL_ia32cap_P,%eax
+       btl     $26,(%eax)
+       jnc     .L009mw_non_sse2
+       movl    4(%esp),%eax
+       movl    8(%esp),%edx
+       movl    12(%esp),%ecx
+       movd    16(%esp),%mm0
+       pxor    %mm1,%mm1
+.align 16
+.L010mw_sse2_loop:
+       movd    (%edx),%mm2
+       pmuludq %mm0,%mm2
+       leal    4(%edx),%edx
+       paddq   %mm2,%mm1
+       movd    %mm1,(%eax)
+       subl    $1,%ecx
+       psrlq   $32,%mm1
+       leal    4(%eax),%eax
+       jnz     .L010mw_sse2_loop
+       movd    %mm1,%eax
+       emms
+       ret
+.align 16
+.L009mw_non_sse2:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+
+       xorl    %esi,%esi
+       movl    20(%esp),%edi
+       movl    24(%esp),%ebx
+       movl    28(%esp),%ebp
+       movl    32(%esp),%ecx
+       andl    $4294967288,%ebp
+       jz      .L011mw_finish
+.L012mw_loop:
+
+       movl    (%ebx),%eax
+       mull    %ecx
+       addl    %esi,%eax
+       adcl    $0,%edx
+       movl    %eax,(%edi)
+       movl    %edx,%esi
+
+       movl    4(%ebx),%eax
+       mull    %ecx
+       addl    %esi,%eax
+       adcl    $0,%edx
+       movl    %eax,4(%edi)
+       movl    %edx,%esi
+
+       movl    8(%ebx),%eax
+       mull    %ecx
+       addl    %esi,%eax
+       adcl    $0,%edx
+       movl    %eax,8(%edi)
+       movl    %edx,%esi
+
+       movl    12(%ebx),%eax
+       mull    %ecx
+       addl    %esi,%eax
+       adcl    $0,%edx
+       movl    %eax,12(%edi)
+       movl    %edx,%esi
+
+       movl    16(%ebx),%eax
+       mull    %ecx
+       addl    %esi,%eax
+       adcl    $0,%edx
+       movl    %eax,16(%edi)
+       movl    %edx,%esi
+
+       movl    20(%ebx),%eax
+       mull    %ecx
+       addl    %esi,%eax
+       adcl    $0,%edx
+       movl    %eax,20(%edi)
+       movl    %edx,%esi
+
+       movl    24(%ebx),%eax
+       mull    %ecx
+       addl    %esi,%eax
+       adcl    $0,%edx
+       movl    %eax,24(%edi)
+       movl    %edx,%esi
+
+       movl    28(%ebx),%eax
+       mull    %ecx
+       addl    %esi,%eax
+       adcl    $0,%edx
+       movl    %eax,28(%edi)
+       movl    %edx,%esi
+
+       addl    $32,%ebx
+       addl    $32,%edi
+       subl    $8,%ebp
+       jz      .L011mw_finish
+       jmp     .L012mw_loop
+.L011mw_finish:
+       movl    28(%esp),%ebp
+       andl    $7,%ebp
+       jnz     .L013mw_finish2
+       jmp     .L014mw_end
+.L013mw_finish2:
+
+       movl    (%ebx),%eax
+       mull    %ecx
+       addl    %esi,%eax
+       adcl    $0,%edx
+       movl    %eax,(%edi)
+       movl    %edx,%esi
+       decl    %ebp
+       jz      .L014mw_end
+
+       movl    4(%ebx),%eax
+       mull    %ecx
+       addl    %esi,%eax
+       adcl    $0,%edx
+       movl    %eax,4(%edi)
+       movl    %edx,%esi
+       decl    %ebp
+       jz      .L014mw_end
+
+       movl    8(%ebx),%eax
+       mull    %ecx
+       addl    %esi,%eax
+       adcl    $0,%edx
+       movl    %eax,8(%edi)
+       movl    %edx,%esi
+       decl    %ebp
+       jz      .L014mw_end
+
+       movl    12(%ebx),%eax
+       mull    %ecx
+       addl    %esi,%eax
+       adcl    $0,%edx
+       movl    %eax,12(%edi)
+       movl    %edx,%esi
+       decl    %ebp
+       jz      .L014mw_end
+
+       movl    16(%ebx),%eax
+       mull    %ecx
+       addl    %esi,%eax
+       adcl    $0,%edx
+       movl    %eax,16(%edi)
+       movl    %edx,%esi
+       decl    %ebp
+       jz      .L014mw_end
+
+       movl    20(%ebx),%eax
+       mull    %ecx
+       addl    %esi,%eax
+       adcl    $0,%edx
+       movl    %eax,20(%edi)
+       movl    %edx,%esi
+       decl    %ebp
+       jz      .L014mw_end
+
+       movl    24(%ebx),%eax
+       mull    %ecx
+       addl    %esi,%eax
+       adcl    $0,%edx
+       movl    %eax,24(%edi)
+       movl    %edx,%esi
+.L014mw_end:
+       movl    %esi,%eax
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  bn_mul_words,.-.L_bn_mul_words_begin
+.globl bn_sqr_words
+.type  bn_sqr_words,@function
+.align 16
+bn_sqr_words:
+.L_bn_sqr_words_begin:
+       leal    OPENSSL_ia32cap_P,%eax
+       btl     $26,(%eax)
+       jnc     .L015sqr_non_sse2
+       movl    4(%esp),%eax
+       movl    8(%esp),%edx
+       movl    12(%esp),%ecx
+.align 16
+.L016sqr_sse2_loop:
+       movd    (%edx),%mm0
+       pmuludq %mm0,%mm0
+       leal    4(%edx),%edx
+       movq    %mm0,(%eax)
+       subl    $1,%ecx
+       leal    8(%eax),%eax
+       jnz     .L016sqr_sse2_loop
+       emms
+       ret
+.align 16
+.L015sqr_non_sse2:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+
+       movl    20(%esp),%esi
+       movl    24(%esp),%edi
+       movl    28(%esp),%ebx
+       andl    $4294967288,%ebx
+       jz      .L017sw_finish
+.L018sw_loop:
+
+       movl    (%edi),%eax
+       mull    %eax
+       movl    %eax,(%esi)
+       movl    %edx,4(%esi)
+
+       movl    4(%edi),%eax
+       mull    %eax
+       movl    %eax,8(%esi)
+       movl    %edx,12(%esi)
+
+       movl    8(%edi),%eax
+       mull    %eax
+       movl    %eax,16(%esi)
+       movl    %edx,20(%esi)
+
+       movl    12(%edi),%eax
+       mull    %eax
+       movl    %eax,24(%esi)
+       movl    %edx,28(%esi)
+
+       movl    16(%edi),%eax
+       mull    %eax
+       movl    %eax,32(%esi)
+       movl    %edx,36(%esi)
+
+       movl    20(%edi),%eax
+       mull    %eax
+       movl    %eax,40(%esi)
+       movl    %edx,44(%esi)
+
+       movl    24(%edi),%eax
+       mull    %eax
+       movl    %eax,48(%esi)
+       movl    %edx,52(%esi)
+
+       movl    28(%edi),%eax
+       mull    %eax
+       movl    %eax,56(%esi)
+       movl    %edx,60(%esi)
+
+       addl    $32,%edi
+       addl    $64,%esi
+       subl    $8,%ebx
+       jnz     .L018sw_loop
+.L017sw_finish:
+       movl    28(%esp),%ebx
+       andl    $7,%ebx
+       jz      .L019sw_end
+
+       movl    (%edi),%eax
+       mull    %eax
+       movl    %eax,(%esi)
+       decl    %ebx
+       movl    %edx,4(%esi)
+       jz      .L019sw_end
+
+       movl    4(%edi),%eax
+       mull    %eax
+       movl    %eax,8(%esi)
+       decl    %ebx
+       movl    %edx,12(%esi)
+       jz      .L019sw_end
+
+       movl    8(%edi),%eax
+       mull    %eax
+       movl    %eax,16(%esi)
+       decl    %ebx
+       movl    %edx,20(%esi)
+       jz      .L019sw_end
+
+       movl    12(%edi),%eax
+       mull    %eax
+       movl    %eax,24(%esi)
+       decl    %ebx
+       movl    %edx,28(%esi)
+       jz      .L019sw_end
+
+       movl    16(%edi),%eax
+       mull    %eax
+       movl    %eax,32(%esi)
+       decl    %ebx
+       movl    %edx,36(%esi)
+       jz      .L019sw_end
+
+       movl    20(%edi),%eax
+       mull    %eax
+       movl    %eax,40(%esi)
+       decl    %ebx
+       movl    %edx,44(%esi)
+       jz      .L019sw_end
+
+       movl    24(%edi),%eax
+       mull    %eax
+       movl    %eax,48(%esi)
+       movl    %edx,52(%esi)
+.L019sw_end:
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  bn_sqr_words,.-.L_bn_sqr_words_begin
+.globl bn_div_words
+.type  bn_div_words,@function
+.align 16
+bn_div_words:
+.L_bn_div_words_begin:
+       movl    4(%esp),%edx
+       movl    8(%esp),%eax
+       movl    12(%esp),%ecx
+       divl    %ecx
+       ret
+.size  bn_div_words,.-.L_bn_div_words_begin
+.globl bn_add_words
+.type  bn_add_words,@function
+.align 16
+bn_add_words:
+.L_bn_add_words_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+
+       movl    20(%esp),%ebx
+       movl    24(%esp),%esi
+       movl    28(%esp),%edi
+       movl    32(%esp),%ebp
+       xorl    %eax,%eax
+       andl    $4294967288,%ebp
+       jz      .L020aw_finish
+.L021aw_loop:
+
+       movl    (%esi),%ecx
+       movl    (%edi),%edx
+       addl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       addl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,(%ebx)
+
+       movl    4(%esi),%ecx
+       movl    4(%edi),%edx
+       addl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       addl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,4(%ebx)
+
+       movl    8(%esi),%ecx
+       movl    8(%edi),%edx
+       addl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       addl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,8(%ebx)
+
+       movl    12(%esi),%ecx
+       movl    12(%edi),%edx
+       addl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       addl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,12(%ebx)
+
+       movl    16(%esi),%ecx
+       movl    16(%edi),%edx
+       addl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       addl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,16(%ebx)
+
+       movl    20(%esi),%ecx
+       movl    20(%edi),%edx
+       addl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       addl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,20(%ebx)
+
+       movl    24(%esi),%ecx
+       movl    24(%edi),%edx
+       addl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       addl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,24(%ebx)
+
+       movl    28(%esi),%ecx
+       movl    28(%edi),%edx
+       addl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       addl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,28(%ebx)
+
+       addl    $32,%esi
+       addl    $32,%edi
+       addl    $32,%ebx
+       subl    $8,%ebp
+       jnz     .L021aw_loop
+.L020aw_finish:
+       movl    32(%esp),%ebp
+       andl    $7,%ebp
+       jz      .L022aw_end
+
+       movl    (%esi),%ecx
+       movl    (%edi),%edx
+       addl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       addl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,(%ebx)
+       jz      .L022aw_end
+
+       movl    4(%esi),%ecx
+       movl    4(%edi),%edx
+       addl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       addl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,4(%ebx)
+       jz      .L022aw_end
+
+       movl    8(%esi),%ecx
+       movl    8(%edi),%edx
+       addl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       addl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,8(%ebx)
+       jz      .L022aw_end
+
+       movl    12(%esi),%ecx
+       movl    12(%edi),%edx
+       addl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       addl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,12(%ebx)
+       jz      .L022aw_end
+
+       movl    16(%esi),%ecx
+       movl    16(%edi),%edx
+       addl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       addl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,16(%ebx)
+       jz      .L022aw_end
+
+       movl    20(%esi),%ecx
+       movl    20(%edi),%edx
+       addl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       addl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,20(%ebx)
+       jz      .L022aw_end
+
+       movl    24(%esi),%ecx
+       movl    24(%edi),%edx
+       addl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       addl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,24(%ebx)
+.L022aw_end:
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  bn_add_words,.-.L_bn_add_words_begin
+.globl bn_sub_words
+.type  bn_sub_words,@function
+.align 16
+bn_sub_words:
+.L_bn_sub_words_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+
+       movl    20(%esp),%ebx
+       movl    24(%esp),%esi
+       movl    28(%esp),%edi
+       movl    32(%esp),%ebp
+       xorl    %eax,%eax
+       andl    $4294967288,%ebp
+       jz      .L023aw_finish
+.L024aw_loop:
+
+       movl    (%esi),%ecx
+       movl    (%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,(%ebx)
+
+       movl    4(%esi),%ecx
+       movl    4(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,4(%ebx)
+
+       movl    8(%esi),%ecx
+       movl    8(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,8(%ebx)
+
+       movl    12(%esi),%ecx
+       movl    12(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,12(%ebx)
+
+       movl    16(%esi),%ecx
+       movl    16(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,16(%ebx)
+
+       movl    20(%esi),%ecx
+       movl    20(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,20(%ebx)
+
+       movl    24(%esi),%ecx
+       movl    24(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,24(%ebx)
+
+       movl    28(%esi),%ecx
+       movl    28(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,28(%ebx)
+
+       addl    $32,%esi
+       addl    $32,%edi
+       addl    $32,%ebx
+       subl    $8,%ebp
+       jnz     .L024aw_loop
+.L023aw_finish:
+       movl    32(%esp),%ebp
+       andl    $7,%ebp
+       jz      .L025aw_end
+
+       movl    (%esi),%ecx
+       movl    (%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,(%ebx)
+       jz      .L025aw_end
+
+       movl    4(%esi),%ecx
+       movl    4(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,4(%ebx)
+       jz      .L025aw_end
+
+       movl    8(%esi),%ecx
+       movl    8(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,8(%ebx)
+       jz      .L025aw_end
+
+       movl    12(%esi),%ecx
+       movl    12(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,12(%ebx)
+       jz      .L025aw_end
+
+       movl    16(%esi),%ecx
+       movl    16(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,16(%ebx)
+       jz      .L025aw_end
+
+       movl    20(%esi),%ecx
+       movl    20(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,20(%ebx)
+       jz      .L025aw_end
+
+       movl    24(%esi),%ecx
+       movl    24(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,24(%ebx)
+.L025aw_end:
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  bn_sub_words,.-.L_bn_sub_words_begin
+.globl bn_sub_part_words
+.type  bn_sub_part_words,@function
+.align 16
+bn_sub_part_words:
+.L_bn_sub_part_words_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+
+       movl    20(%esp),%ebx
+       movl    24(%esp),%esi
+       movl    28(%esp),%edi
+       movl    32(%esp),%ebp
+       xorl    %eax,%eax
+       andl    $4294967288,%ebp
+       jz      .L026aw_finish
+.L027aw_loop:
+
+       movl    (%esi),%ecx
+       movl    (%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,(%ebx)
+
+       movl    4(%esi),%ecx
+       movl    4(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,4(%ebx)
+
+       movl    8(%esi),%ecx
+       movl    8(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,8(%ebx)
+
+       movl    12(%esi),%ecx
+       movl    12(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,12(%ebx)
+
+       movl    16(%esi),%ecx
+       movl    16(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,16(%ebx)
+
+       movl    20(%esi),%ecx
+       movl    20(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,20(%ebx)
+
+       movl    24(%esi),%ecx
+       movl    24(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,24(%ebx)
+
+       movl    28(%esi),%ecx
+       movl    28(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,28(%ebx)
+
+       addl    $32,%esi
+       addl    $32,%edi
+       addl    $32,%ebx
+       subl    $8,%ebp
+       jnz     .L027aw_loop
+.L026aw_finish:
+       movl    32(%esp),%ebp
+       andl    $7,%ebp
+       jz      .L028aw_end
+
+       movl    (%esi),%ecx
+       movl    (%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,(%ebx)
+       addl    $4,%esi
+       addl    $4,%edi
+       addl    $4,%ebx
+       decl    %ebp
+       jz      .L028aw_end
+
+       movl    (%esi),%ecx
+       movl    (%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,(%ebx)
+       addl    $4,%esi
+       addl    $4,%edi
+       addl    $4,%ebx
+       decl    %ebp
+       jz      .L028aw_end
+
+       movl    (%esi),%ecx
+       movl    (%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,(%ebx)
+       addl    $4,%esi
+       addl    $4,%edi
+       addl    $4,%ebx
+       decl    %ebp
+       jz      .L028aw_end
+
+       movl    (%esi),%ecx
+       movl    (%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,(%ebx)
+       addl    $4,%esi
+       addl    $4,%edi
+       addl    $4,%ebx
+       decl    %ebp
+       jz      .L028aw_end
+
+       movl    (%esi),%ecx
+       movl    (%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,(%ebx)
+       addl    $4,%esi
+       addl    $4,%edi
+       addl    $4,%ebx
+       decl    %ebp
+       jz      .L028aw_end
+
+       movl    (%esi),%ecx
+       movl    (%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,(%ebx)
+       addl    $4,%esi
+       addl    $4,%edi
+       addl    $4,%ebx
+       decl    %ebp
+       jz      .L028aw_end
+
+       movl    (%esi),%ecx
+       movl    (%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,(%ebx)
+       addl    $4,%esi
+       addl    $4,%edi
+       addl    $4,%ebx
+.L028aw_end:
+       cmpl    $0,36(%esp)
+       je      .L029pw_end
+       movl    36(%esp),%ebp
+       cmpl    $0,%ebp
+       je      .L029pw_end
+       jge     .L030pw_pos
+
+       movl    $0,%edx
+       subl    %ebp,%edx
+       movl    %edx,%ebp
+       andl    $4294967288,%ebp
+       jz      .L031pw_neg_finish
+.L032pw_neg_loop:
+
+       movl    $0,%ecx
+       movl    (%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,(%ebx)
+
+       movl    $0,%ecx
+       movl    4(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,4(%ebx)
+
+       movl    $0,%ecx
+       movl    8(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,8(%ebx)
+
+       movl    $0,%ecx
+       movl    12(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,12(%ebx)
+
+       movl    $0,%ecx
+       movl    16(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,16(%ebx)
+
+       movl    $0,%ecx
+       movl    20(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,20(%ebx)
+
+       movl    $0,%ecx
+       movl    24(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,24(%ebx)
+
+       movl    $0,%ecx
+       movl    28(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,28(%ebx)
+
+       addl    $32,%edi
+       addl    $32,%ebx
+       subl    $8,%ebp
+       jnz     .L032pw_neg_loop
+.L031pw_neg_finish:
+       movl    36(%esp),%edx
+       movl    $0,%ebp
+       subl    %edx,%ebp
+       andl    $7,%ebp
+       jz      .L029pw_end
+
+       movl    $0,%ecx
+       movl    (%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,(%ebx)
+       jz      .L029pw_end
+
+       movl    $0,%ecx
+       movl    4(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,4(%ebx)
+       jz      .L029pw_end
+
+       movl    $0,%ecx
+       movl    8(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,8(%ebx)
+       jz      .L029pw_end
+
+       movl    $0,%ecx
+       movl    12(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,12(%ebx)
+       jz      .L029pw_end
+
+       movl    $0,%ecx
+       movl    16(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,16(%ebx)
+       jz      .L029pw_end
+
+       movl    $0,%ecx
+       movl    20(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       decl    %ebp
+       movl    %ecx,20(%ebx)
+       jz      .L029pw_end
+
+       movl    $0,%ecx
+       movl    24(%edi),%edx
+       subl    %eax,%ecx
+       movl    $0,%eax
+       adcl    %eax,%eax
+       subl    %edx,%ecx
+       adcl    $0,%eax
+       movl    %ecx,24(%ebx)
+       jmp     .L029pw_end
+.L030pw_pos:
+       andl    $4294967288,%ebp
+       jz      .L033pw_pos_finish
+.L034pw_pos_loop:
+
+       movl    (%esi),%ecx
+       subl    %eax,%ecx
+       movl    %ecx,(%ebx)
+       jnc     .L035pw_nc0
+
+       movl    4(%esi),%ecx
+       subl    %eax,%ecx
+       movl    %ecx,4(%ebx)
+       jnc     .L036pw_nc1
+
+       movl    8(%esi),%ecx
+       subl    %eax,%ecx
+       movl    %ecx,8(%ebx)
+       jnc     .L037pw_nc2
+
+       movl    12(%esi),%ecx
+       subl    %eax,%ecx
+       movl    %ecx,12(%ebx)
+       jnc     .L038pw_nc3
+
+       movl    16(%esi),%ecx
+       subl    %eax,%ecx
+       movl    %ecx,16(%ebx)
+       jnc     .L039pw_nc4
+
+       movl    20(%esi),%ecx
+       subl    %eax,%ecx
+       movl    %ecx,20(%ebx)
+       jnc     .L040pw_nc5
+
+       movl    24(%esi),%ecx
+       subl    %eax,%ecx
+       movl    %ecx,24(%ebx)
+       jnc     .L041pw_nc6
+
+       movl    28(%esi),%ecx
+       subl    %eax,%ecx
+       movl    %ecx,28(%ebx)
+       jnc     .L042pw_nc7
+
+       addl    $32,%esi
+       addl    $32,%ebx
+       subl    $8,%ebp
+       jnz     .L034pw_pos_loop
+.L033pw_pos_finish:
+       movl    36(%esp),%ebp
+       andl    $7,%ebp
+       jz      .L029pw_end
+
+       movl    (%esi),%ecx
+       subl    %eax,%ecx
+       movl    %ecx,(%ebx)
+       jnc     .L043pw_tail_nc0
+       decl    %ebp
+       jz      .L029pw_end
+
+       movl    4(%esi),%ecx
+       subl    %eax,%ecx
+       movl    %ecx,4(%ebx)
+       jnc     .L044pw_tail_nc1
+       decl    %ebp
+       jz      .L029pw_end
+
+       movl    8(%esi),%ecx
+       subl    %eax,%ecx
+       movl    %ecx,8(%ebx)
+       jnc     .L045pw_tail_nc2
+       decl    %ebp
+       jz      .L029pw_end
+
+       movl    12(%esi),%ecx
+       subl    %eax,%ecx
+       movl    %ecx,12(%ebx)
+       jnc     .L046pw_tail_nc3
+       decl    %ebp
+       jz      .L029pw_end
+
+       movl    16(%esi),%ecx
+       subl    %eax,%ecx
+       movl    %ecx,16(%ebx)
+       jnc     .L047pw_tail_nc4
+       decl    %ebp
+       jz      .L029pw_end
+
+       movl    20(%esi),%ecx
+       subl    %eax,%ecx
+       movl    %ecx,20(%ebx)
+       jnc     .L048pw_tail_nc5
+       decl    %ebp
+       jz      .L029pw_end
+
+       movl    24(%esi),%ecx
+       subl    %eax,%ecx
+       movl    %ecx,24(%ebx)
+       jnc     .L049pw_tail_nc6
+       movl    $1,%eax
+       jmp     .L029pw_end
+.L050pw_nc_loop:
+       movl    (%esi),%ecx
+       movl    %ecx,(%ebx)
+.L035pw_nc0:
+       movl    4(%esi),%ecx
+       movl    %ecx,4(%ebx)
+.L036pw_nc1:
+       movl    8(%esi),%ecx
+       movl    %ecx,8(%ebx)
+.L037pw_nc2:
+       movl    12(%esi),%ecx
+       movl    %ecx,12(%ebx)
+.L038pw_nc3:
+       movl    16(%esi),%ecx
+       movl    %ecx,16(%ebx)
+.L039pw_nc4:
+       movl    20(%esi),%ecx
+       movl    %ecx,20(%ebx)
+.L040pw_nc5:
+       movl    24(%esi),%ecx
+       movl    %ecx,24(%ebx)
+.L041pw_nc6:
+       movl    28(%esi),%ecx
+       movl    %ecx,28(%ebx)
+.L042pw_nc7:
+
+       addl    $32,%esi
+       addl    $32,%ebx
+       subl    $8,%ebp
+       jnz     .L050pw_nc_loop
+       movl    36(%esp),%ebp
+       andl    $7,%ebp
+       jz      .L051pw_nc_end
+       movl    (%esi),%ecx
+       movl    %ecx,(%ebx)
+.L043pw_tail_nc0:
+       decl    %ebp
+       jz      .L051pw_nc_end
+       movl    4(%esi),%ecx
+       movl    %ecx,4(%ebx)
+.L044pw_tail_nc1:
+       decl    %ebp
+       jz      .L051pw_nc_end
+       movl    8(%esi),%ecx
+       movl    %ecx,8(%ebx)
+.L045pw_tail_nc2:
+       decl    %ebp
+       jz      .L051pw_nc_end
+       movl    12(%esi),%ecx
+       movl    %ecx,12(%ebx)
+.L046pw_tail_nc3:
+       decl    %ebp
+       jz      .L051pw_nc_end
+       movl    16(%esi),%ecx
+       movl    %ecx,16(%ebx)
+.L047pw_tail_nc4:
+       decl    %ebp
+       jz      .L051pw_nc_end
+       movl    20(%esi),%ecx
+       movl    %ecx,20(%ebx)
+.L048pw_tail_nc5:
+       decl    %ebp
+       jz      .L051pw_nc_end
+       movl    24(%esi),%ecx
+       movl    %ecx,24(%ebx)
+.L049pw_tail_nc6:
+.L051pw_nc_end:
+       movl    $0,%eax
+.L029pw_end:
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  bn_sub_part_words,.-.L_bn_sub_part_words_begin
+.comm  OPENSSL_ia32cap_P,4,4
diff --git a/secure/lib/libcrypto/asm/cmll-x86.s b/secure/lib/libcrypto/asm/cmll-x86.s
new file mode 100644 (file)
index 0000000..a896314
--- /dev/null
@@ -0,0 +1,2375 @@
+.file  "cmll-586.s"
+.text
+.globl Camellia_EncryptBlock_Rounds
+.type  Camellia_EncryptBlock_Rounds,@function
+.align 16
+Camellia_EncryptBlock_Rounds:
+.L_Camellia_EncryptBlock_Rounds_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    20(%esp),%eax
+       movl    24(%esp),%esi
+       movl    28(%esp),%edi
+       movl    %esp,%ebx
+       subl    $28,%esp
+       andl    $-64,%esp
+       leal    -127(%edi),%ecx
+       subl    %esp,%ecx
+       negl    %ecx
+       andl    $960,%ecx
+       subl    %ecx,%esp
+       addl    $4,%esp
+       shll    $6,%eax
+       leal    (%edi,%eax,1),%eax
+       movl    %ebx,20(%esp)
+       movl    %eax,16(%esp)
+       call    .L000pic_point
+.L000pic_point:
+       popl    %ebp
+       leal    .LCamellia_SBOX-.L000pic_point(%ebp),%ebp
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    8(%esi),%ecx
+       bswap   %eax
+       movl    12(%esi),%edx
+       bswap   %ebx
+       bswap   %ecx
+       bswap   %edx
+       call    _x86_Camellia_encrypt
+       movl    20(%esp),%esp
+       bswap   %eax
+       movl    32(%esp),%esi
+       bswap   %ebx
+       bswap   %ecx
+       bswap   %edx
+       movl    %eax,(%esi)
+       movl    %ebx,4(%esi)
+       movl    %ecx,8(%esi)
+       movl    %edx,12(%esi)
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  Camellia_EncryptBlock_Rounds,.-.L_Camellia_EncryptBlock_Rounds_begin
+.globl Camellia_EncryptBlock
+.type  Camellia_EncryptBlock,@function
+.align 16
+Camellia_EncryptBlock:
+.L_Camellia_EncryptBlock_begin:
+       movl    $128,%eax
+       subl    4(%esp),%eax
+       movl    $3,%eax
+       adcl    $0,%eax
+       movl    %eax,4(%esp)
+       jmp     .L_Camellia_EncryptBlock_Rounds_begin
+.size  Camellia_EncryptBlock,.-.L_Camellia_EncryptBlock_begin
+.globl Camellia_encrypt
+.type  Camellia_encrypt,@function
+.align 16
+Camellia_encrypt:
+.L_Camellia_encrypt_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    20(%esp),%esi
+       movl    28(%esp),%edi
+       movl    %esp,%ebx
+       subl    $28,%esp
+       andl    $-64,%esp
+       movl    272(%edi),%eax
+       leal    -127(%edi),%ecx
+       subl    %esp,%ecx
+       negl    %ecx
+       andl    $960,%ecx
+       subl    %ecx,%esp
+       addl    $4,%esp
+       shll    $6,%eax
+       leal    (%edi,%eax,1),%eax
+       movl    %ebx,20(%esp)
+       movl    %eax,16(%esp)
+       call    .L001pic_point
+.L001pic_point:
+       popl    %ebp
+       leal    .LCamellia_SBOX-.L001pic_point(%ebp),%ebp
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    8(%esi),%ecx
+       bswap   %eax
+       movl    12(%esi),%edx
+       bswap   %ebx
+       bswap   %ecx
+       bswap   %edx
+       call    _x86_Camellia_encrypt
+       movl    20(%esp),%esp
+       bswap   %eax
+       movl    24(%esp),%esi
+       bswap   %ebx
+       bswap   %ecx
+       bswap   %edx
+       movl    %eax,(%esi)
+       movl    %ebx,4(%esi)
+       movl    %ecx,8(%esi)
+       movl    %edx,12(%esi)
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  Camellia_encrypt,.-.L_Camellia_encrypt_begin
+.type  _x86_Camellia_encrypt,@function
+.align 16
+_x86_Camellia_encrypt:
+       xorl    (%edi),%eax
+       xorl    4(%edi),%ebx
+       xorl    8(%edi),%ecx
+       xorl    12(%edi),%edx
+       movl    16(%edi),%esi
+       movl    %eax,4(%esp)
+       movl    %ebx,8(%esp)
+       movl    %ecx,12(%esp)
+       movl    %edx,16(%esp)
+.align 16
+.L002loop:
+       xorl    %esi,%eax
+       xorl    20(%edi),%ebx
+       movzbl  %ah,%esi
+       movl    2052(%ebp,%esi,8),%edx
+       movzbl  %al,%esi
+       xorl    4(%ebp,%esi,8),%edx
+       shrl    $16,%eax
+       movzbl  %bl,%esi
+       movl    (%ebp,%esi,8),%ecx
+       movzbl  %ah,%esi
+       xorl    (%ebp,%esi,8),%edx
+       movzbl  %bh,%esi
+       xorl    4(%ebp,%esi,8),%ecx
+       shrl    $16,%ebx
+       movzbl  %al,%eax
+       xorl    2048(%ebp,%eax,8),%edx
+       movzbl  %bh,%esi
+       movl    16(%esp),%eax
+       xorl    %edx,%ecx
+       rorl    $8,%edx
+       xorl    2048(%ebp,%esi,8),%ecx
+       movzbl  %bl,%esi
+       movl    12(%esp),%ebx
+       xorl    %eax,%edx
+       xorl    2052(%ebp,%esi,8),%ecx
+       movl    24(%edi),%esi
+       xorl    %ecx,%edx
+       movl    %edx,16(%esp)
+       xorl    %ebx,%ecx
+       movl    %ecx,12(%esp)
+       xorl    %esi,%ecx
+       xorl    28(%edi),%edx
+       movzbl  %ch,%esi
+       movl    2052(%ebp,%esi,8),%ebx
+       movzbl  %cl,%esi
+       xorl    4(%ebp,%esi,8),%ebx
+       shrl    $16,%ecx
+       movzbl  %dl,%esi
+       movl    (%ebp,%esi,8),%eax
+       movzbl  %ch,%esi
+       xorl    (%ebp,%esi,8),%ebx
+       movzbl  %dh,%esi
+       xorl    4(%ebp,%esi,8),%eax
+       shrl    $16,%edx
+       movzbl  %cl,%ecx
+       xorl    2048(%ebp,%ecx,8),%ebx
+       movzbl  %dh,%esi
+       movl    8(%esp),%ecx
+       xorl    %ebx,%eax
+       rorl    $8,%ebx
+       xorl    2048(%ebp,%esi,8),%eax
+       movzbl  %dl,%esi
+       movl    4(%esp),%edx
+       xorl    %ecx,%ebx
+       xorl    2052(%ebp,%esi,8),%eax
+       movl    32(%edi),%esi
+       xorl    %eax,%ebx
+       movl    %ebx,8(%esp)
+       xorl    %edx,%eax
+       movl    %eax,4(%esp)
+       xorl    %esi,%eax
+       xorl    36(%edi),%ebx
+       movzbl  %ah,%esi
+       movl    2052(%ebp,%esi,8),%edx
+       movzbl  %al,%esi
+       xorl    4(%ebp,%esi,8),%edx
+       shrl    $16,%eax
+       movzbl  %bl,%esi
+       movl    (%ebp,%esi,8),%ecx
+       movzbl  %ah,%esi
+       xorl    (%ebp,%esi,8),%edx
+       movzbl  %bh,%esi
+       xorl    4(%ebp,%esi,8),%ecx
+       shrl    $16,%ebx
+       movzbl  %al,%eax
+       xorl    2048(%ebp,%eax,8),%edx
+       movzbl  %bh,%esi
+       movl    16(%esp),%eax
+       xorl    %edx,%ecx
+       rorl    $8,%edx
+       xorl    2048(%ebp,%esi,8),%ecx
+       movzbl  %bl,%esi
+       movl    12(%esp),%ebx
+       xorl    %eax,%edx
+       xorl    2052(%ebp,%esi,8),%ecx
+       movl    40(%edi),%esi
+       xorl    %ecx,%edx
+       movl    %edx,16(%esp)
+       xorl    %ebx,%ecx
+       movl    %ecx,12(%esp)
+       xorl    %esi,%ecx
+       xorl    44(%edi),%edx
+       movzbl  %ch,%esi
+       movl    2052(%ebp,%esi,8),%ebx
+       movzbl  %cl,%esi
+       xorl    4(%ebp,%esi,8),%ebx
+       shrl    $16,%ecx
+       movzbl  %dl,%esi
+       movl    (%ebp,%esi,8),%eax
+       movzbl  %ch,%esi
+       xorl    (%ebp,%esi,8),%ebx
+       movzbl  %dh,%esi
+       xorl    4(%ebp,%esi,8),%eax
+       shrl    $16,%edx
+       movzbl  %cl,%ecx
+       xorl    2048(%ebp,%ecx,8),%ebx
+       movzbl  %dh,%esi
+       movl    8(%esp),%ecx
+       xorl    %ebx,%eax
+       rorl    $8,%ebx
+       xorl    2048(%ebp,%esi,8),%eax
+       movzbl  %dl,%esi
+       movl    4(%esp),%edx
+       xorl    %ecx,%ebx
+       xorl    2052(%ebp,%esi,8),%eax
+       movl    48(%edi),%esi
+       xorl    %eax,%ebx
+       movl    %ebx,8(%esp)
+       xorl    %edx,%eax
+       movl    %eax,4(%esp)
+       xorl    %esi,%eax
+       xorl    52(%edi),%ebx
+       movzbl  %ah,%esi
+       movl    2052(%ebp,%esi,8),%edx
+       movzbl  %al,%esi
+       xorl    4(%ebp,%esi,8),%edx
+       shrl    $16,%eax
+       movzbl  %bl,%esi
+       movl    (%ebp,%esi,8),%ecx
+       movzbl  %ah,%esi
+       xorl    (%ebp,%esi,8),%edx
+       movzbl  %bh,%esi
+       xorl    4(%ebp,%esi,8),%ecx
+       shrl    $16,%ebx
+       movzbl  %al,%eax
+       xorl    2048(%ebp,%eax,8),%edx
+       movzbl  %bh,%esi
+       movl    16(%esp),%eax
+       xorl    %edx,%ecx
+       rorl    $8,%edx
+       xorl    2048(%ebp,%esi,8),%ecx
+       movzbl  %bl,%esi
+       movl    12(%esp),%ebx
+       xorl    %eax,%edx
+       xorl    2052(%ebp,%esi,8),%ecx
+       movl    56(%edi),%esi
+       xorl    %ecx,%edx
+       movl    %edx,16(%esp)
+       xorl    %ebx,%ecx
+       movl    %ecx,12(%esp)
+       xorl    %esi,%ecx
+       xorl    60(%edi),%edx
+       movzbl  %ch,%esi
+       movl    2052(%ebp,%esi,8),%ebx
+       movzbl  %cl,%esi
+       xorl    4(%ebp,%esi,8),%ebx
+       shrl    $16,%ecx
+       movzbl  %dl,%esi
+       movl    (%ebp,%esi,8),%eax
+       movzbl  %ch,%esi
+       xorl    (%ebp,%esi,8),%ebx
+       movzbl  %dh,%esi
+       xorl    4(%ebp,%esi,8),%eax
+       shrl    $16,%edx
+       movzbl  %cl,%ecx
+       xorl    2048(%ebp,%ecx,8),%ebx
+       movzbl  %dh,%esi
+       movl    8(%esp),%ecx
+       xorl    %ebx,%eax
+       rorl    $8,%ebx
+       xorl    2048(%ebp,%esi,8),%eax
+       movzbl  %dl,%esi
+       movl    4(%esp),%edx
+       xorl    %ecx,%ebx
+       xorl    2052(%ebp,%esi,8),%eax
+       movl    64(%edi),%esi
+       xorl    %eax,%ebx
+       movl    %ebx,8(%esp)
+       xorl    %edx,%eax
+       movl    %eax,4(%esp)
+       addl    $64,%edi
+       cmpl    20(%esp),%edi
+       je      .L003done
+       andl    %eax,%esi
+       movl    16(%esp),%edx
+       roll    $1,%esi
+       movl    %edx,%ecx
+       xorl    %esi,%ebx
+       orl     12(%edi),%ecx
+       movl    %ebx,8(%esp)
+       xorl    12(%esp),%ecx
+       movl    4(%edi),%esi
+       movl    %ecx,12(%esp)
+       orl     %ebx,%esi
+       andl    8(%edi),%ecx
+       xorl    %esi,%eax
+       roll    $1,%ecx
+       movl    %eax,4(%esp)
+       xorl    %ecx,%edx
+       movl    16(%edi),%esi
+       movl    %edx,16(%esp)
+       jmp     .L002loop
+.align 8
+.L003done:
+       movl    %eax,%ecx
+       movl    %ebx,%edx
+       movl    12(%esp),%eax
+       movl    16(%esp),%ebx
+       xorl    %esi,%eax
+       xorl    4(%edi),%ebx
+       xorl    8(%edi),%ecx
+       xorl    12(%edi),%edx
+       ret
+.size  _x86_Camellia_encrypt,.-_x86_Camellia_encrypt
+.globl Camellia_DecryptBlock_Rounds
+.type  Camellia_DecryptBlock_Rounds,@function
+.align 16
+Camellia_DecryptBlock_Rounds:
+.L_Camellia_DecryptBlock_Rounds_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    20(%esp),%eax
+       movl    24(%esp),%esi
+       movl    28(%esp),%edi
+       movl    %esp,%ebx
+       subl    $28,%esp
+       andl    $-64,%esp
+       leal    -127(%edi),%ecx
+       subl    %esp,%ecx
+       negl    %ecx
+       andl    $960,%ecx
+       subl    %ecx,%esp
+       addl    $4,%esp
+       shll    $6,%eax
+       movl    %edi,16(%esp)
+       leal    (%edi,%eax,1),%edi
+       movl    %ebx,20(%esp)
+       call    .L004pic_point
+.L004pic_point:
+       popl    %ebp
+       leal    .LCamellia_SBOX-.L004pic_point(%ebp),%ebp
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    8(%esi),%ecx
+       bswap   %eax
+       movl    12(%esi),%edx
+       bswap   %ebx
+       bswap   %ecx
+       bswap   %edx
+       call    _x86_Camellia_decrypt
+       movl    20(%esp),%esp
+       bswap   %eax
+       movl    32(%esp),%esi
+       bswap   %ebx
+       bswap   %ecx
+       bswap   %edx
+       movl    %eax,(%esi)
+       movl    %ebx,4(%esi)
+       movl    %ecx,8(%esi)
+       movl    %edx,12(%esi)
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  Camellia_DecryptBlock_Rounds,.-.L_Camellia_DecryptBlock_Rounds_begin
+.globl Camellia_DecryptBlock
+.type  Camellia_DecryptBlock,@function
+.align 16
+Camellia_DecryptBlock:
+.L_Camellia_DecryptBlock_begin:
+       movl    $128,%eax
+       subl    4(%esp),%eax
+       movl    $3,%eax
+       adcl    $0,%eax
+       movl    %eax,4(%esp)
+       jmp     .L_Camellia_DecryptBlock_Rounds_begin
+.size  Camellia_DecryptBlock,.-.L_Camellia_DecryptBlock_begin
+.globl Camellia_decrypt
+.type  Camellia_decrypt,@function
+.align 16
+Camellia_decrypt:
+.L_Camellia_decrypt_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    20(%esp),%esi
+       movl    28(%esp),%edi
+       movl    %esp,%ebx
+       subl    $28,%esp
+       andl    $-64,%esp
+       movl    272(%edi),%eax
+       leal    -127(%edi),%ecx
+       subl    %esp,%ecx
+       negl    %ecx
+       andl    $960,%ecx
+       subl    %ecx,%esp
+       addl    $4,%esp
+       shll    $6,%eax
+       movl    %edi,16(%esp)
+       leal    (%edi,%eax,1),%edi
+       movl    %ebx,20(%esp)
+       call    .L005pic_point
+.L005pic_point:
+       popl    %ebp
+       leal    .LCamellia_SBOX-.L005pic_point(%ebp),%ebp
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    8(%esi),%ecx
+       bswap   %eax
+       movl    12(%esi),%edx
+       bswap   %ebx
+       bswap   %ecx
+       bswap   %edx
+       call    _x86_Camellia_decrypt
+       movl    20(%esp),%esp
+       bswap   %eax
+       movl    24(%esp),%esi
+       bswap   %ebx
+       bswap   %ecx
+       bswap   %edx
+       movl    %eax,(%esi)
+       movl    %ebx,4(%esi)
+       movl    %ecx,8(%esi)
+       movl    %edx,12(%esi)
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  Camellia_decrypt,.-.L_Camellia_decrypt_begin
+.type  _x86_Camellia_decrypt,@function
+.align 16
+_x86_Camellia_decrypt:
+       xorl    (%edi),%eax
+       xorl    4(%edi),%ebx
+       xorl    8(%edi),%ecx
+       xorl    12(%edi),%edx
+       movl    -8(%edi),%esi
+       movl    %eax,4(%esp)
+       movl    %ebx,8(%esp)
+       movl    %ecx,12(%esp)
+       movl    %edx,16(%esp)
+.align 16
+.L006loop:
+       xorl    %esi,%eax
+       xorl    -4(%edi),%ebx
+       movzbl  %ah,%esi
+       movl    2052(%ebp,%esi,8),%edx
+       movzbl  %al,%esi
+       xorl    4(%ebp,%esi,8),%edx
+       shrl    $16,%eax
+       movzbl  %bl,%esi
+       movl    (%ebp,%esi,8),%ecx
+       movzbl  %ah,%esi
+       xorl    (%ebp,%esi,8),%edx
+       movzbl  %bh,%esi
+       xorl    4(%ebp,%esi,8),%ecx
+       shrl    $16,%ebx
+       movzbl  %al,%eax
+       xorl    2048(%ebp,%eax,8),%edx
+       movzbl  %bh,%esi
+       movl    16(%esp),%eax
+       xorl    %edx,%ecx
+       rorl    $8,%edx
+       xorl    2048(%ebp,%esi,8),%ecx
+       movzbl  %bl,%esi
+       movl    12(%esp),%ebx
+       xorl    %eax,%edx
+       xorl    2052(%ebp,%esi,8),%ecx
+       movl    -16(%edi),%esi
+       xorl    %ecx,%edx
+       movl    %edx,16(%esp)
+       xorl    %ebx,%ecx
+       movl    %ecx,12(%esp)
+       xorl    %esi,%ecx
+       xorl    -12(%edi),%edx
+       movzbl  %ch,%esi
+       movl    2052(%ebp,%esi,8),%ebx
+       movzbl  %cl,%esi
+       xorl    4(%ebp,%esi,8),%ebx
+       shrl    $16,%ecx
+       movzbl  %dl,%esi
+       movl    (%ebp,%esi,8),%eax
+       movzbl  %ch,%esi
+       xorl    (%ebp,%esi,8),%ebx
+       movzbl  %dh,%esi
+       xorl    4(%ebp,%esi,8),%eax
+       shrl    $16,%edx
+       movzbl  %cl,%ecx
+       xorl    2048(%ebp,%ecx,8),%ebx
+       movzbl  %dh,%esi
+       movl    8(%esp),%ecx
+       xorl    %ebx,%eax
+       rorl    $8,%ebx
+       xorl    2048(%ebp,%esi,8),%eax
+       movzbl  %dl,%esi
+       movl    4(%esp),%edx
+       xorl    %ecx,%ebx
+       xorl    2052(%ebp,%esi,8),%eax
+       movl    -24(%edi),%esi
+       xorl    %eax,%ebx
+       movl    %ebx,8(%esp)
+       xorl    %edx,%eax
+       movl    %eax,4(%esp)
+       xorl    %esi,%eax
+       xorl    -20(%edi),%ebx
+       movzbl  %ah,%esi
+       movl    2052(%ebp,%esi,8),%edx
+       movzbl  %al,%esi
+       xorl    4(%ebp,%esi,8),%edx
+       shrl    $16,%eax
+       movzbl  %bl,%esi
+       movl    (%ebp,%esi,8),%ecx
+       movzbl  %ah,%esi
+       xorl    (%ebp,%esi,8),%edx
+       movzbl  %bh,%esi
+       xorl    4(%ebp,%esi,8),%ecx
+       shrl    $16,%ebx
+       movzbl  %al,%eax
+       xorl    2048(%ebp,%eax,8),%edx
+       movzbl  %bh,%esi
+       movl    16(%esp),%eax
+       xorl    %edx,%ecx
+       rorl    $8,%edx
+       xorl    2048(%ebp,%esi,8),%ecx
+       movzbl  %bl,%esi
+       movl    12(%esp),%ebx
+       xorl    %eax,%edx
+       xorl    2052(%ebp,%esi,8),%ecx
+       movl    -32(%edi),%esi
+       xorl    %ecx,%edx
+       movl    %edx,16(%esp)
+       xorl    %ebx,%ecx
+       movl    %ecx,12(%esp)
+       xorl    %esi,%ecx
+       xorl    -28(%edi),%edx
+       movzbl  %ch,%esi
+       movl    2052(%ebp,%esi,8),%ebx
+       movzbl  %cl,%esi
+       xorl    4(%ebp,%esi,8),%ebx
+       shrl    $16,%ecx
+       movzbl  %dl,%esi
+       movl    (%ebp,%esi,8),%eax
+       movzbl  %ch,%esi
+       xorl    (%ebp,%esi,8),%ebx
+       movzbl  %dh,%esi
+       xorl    4(%ebp,%esi,8),%eax
+       shrl    $16,%edx
+       movzbl  %cl,%ecx
+       xorl    2048(%ebp,%ecx,8),%ebx
+       movzbl  %dh,%esi
+       movl    8(%esp),%ecx
+       xorl    %ebx,%eax
+       rorl    $8,%ebx
+       xorl    2048(%ebp,%esi,8),%eax
+       movzbl  %dl,%esi
+       movl    4(%esp),%edx
+       xorl    %ecx,%ebx
+       xorl    2052(%ebp,%esi,8),%eax
+       movl    -40(%edi),%esi
+       xorl    %eax,%ebx
+       movl    %ebx,8(%esp)
+       xorl    %edx,%eax
+       movl    %eax,4(%esp)
+       xorl    %esi,%eax
+       xorl    -36(%edi),%ebx
+       movzbl  %ah,%esi
+       movl    2052(%ebp,%esi,8),%edx
+       movzbl  %al,%esi
+       xorl    4(%ebp,%esi,8),%edx
+       shrl    $16,%eax
+       movzbl  %bl,%esi
+       movl    (%ebp,%esi,8),%ecx
+       movzbl  %ah,%esi
+       xorl    (%ebp,%esi,8),%edx
+       movzbl  %bh,%esi
+       xorl    4(%ebp,%esi,8),%ecx
+       shrl    $16,%ebx
+       movzbl  %al,%eax
+       xorl    2048(%ebp,%eax,8),%edx
+       movzbl  %bh,%esi
+       movl    16(%esp),%eax
+       xorl    %edx,%ecx
+       rorl    $8,%edx
+       xorl    2048(%ebp,%esi,8),%ecx
+       movzbl  %bl,%esi
+       movl    12(%esp),%ebx
+       xorl    %eax,%edx
+       xorl    2052(%ebp,%esi,8),%ecx
+       movl    -48(%edi),%esi
+       xorl    %ecx,%edx
+       movl    %edx,16(%esp)
+       xorl    %ebx,%ecx
+       movl    %ecx,12(%esp)
+       xorl    %esi,%ecx
+       xorl    -44(%edi),%edx
+       movzbl  %ch,%esi
+       movl    2052(%ebp,%esi,8),%ebx
+       movzbl  %cl,%esi
+       xorl    4(%ebp,%esi,8),%ebx
+       shrl    $16,%ecx
+       movzbl  %dl,%esi
+       movl    (%ebp,%esi,8),%eax
+       movzbl  %ch,%esi
+       xorl    (%ebp,%esi,8),%ebx
+       movzbl  %dh,%esi
+       xorl    4(%ebp,%esi,8),%eax
+       shrl    $16,%edx
+       movzbl  %cl,%ecx
+       xorl    2048(%ebp,%ecx,8),%ebx
+       movzbl  %dh,%esi
+       movl    8(%esp),%ecx
+       xorl    %ebx,%eax
+       rorl    $8,%ebx
+       xorl    2048(%ebp,%esi,8),%eax
+       movzbl  %dl,%esi
+       movl    4(%esp),%edx
+       xorl    %ecx,%ebx
+       xorl    2052(%ebp,%esi,8),%eax
+       movl    -56(%edi),%esi
+       xorl    %eax,%ebx
+       movl    %ebx,8(%esp)
+       xorl    %edx,%eax
+       movl    %eax,4(%esp)
+       subl    $64,%edi
+       cmpl    20(%esp),%edi
+       je      .L007done
+       andl    %eax,%esi
+       movl    16(%esp),%edx
+       roll    $1,%esi
+       movl    %edx,%ecx
+       xorl    %esi,%ebx
+       orl     4(%edi),%ecx
+       movl    %ebx,8(%esp)
+       xorl    12(%esp),%ecx
+       movl    12(%edi),%esi
+       movl    %ecx,12(%esp)
+       orl     %ebx,%esi
+       andl    (%edi),%ecx
+       xorl    %esi,%eax
+       roll    $1,%ecx
+       movl    %eax,4(%esp)
+       xorl    %ecx,%edx
+       movl    -8(%edi),%esi
+       movl    %edx,16(%esp)
+       jmp     .L006loop
+.align 8
+.L007done:
+       movl    %eax,%ecx
+       movl    %ebx,%edx
+       movl    12(%esp),%eax
+       movl    16(%esp),%ebx
+       xorl    %esi,%ecx
+       xorl    12(%edi),%edx
+       xorl    (%edi),%eax
+       xorl    4(%edi),%ebx
+       ret
+.size  _x86_Camellia_decrypt,.-_x86_Camellia_decrypt
+.globl Camellia_Ekeygen
+.type  Camellia_Ekeygen,@function
+.align 16
+Camellia_Ekeygen:
+.L_Camellia_Ekeygen_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       subl    $16,%esp
+       movl    36(%esp),%ebp
+       movl    40(%esp),%esi
+       movl    44(%esp),%edi
+       movl    (%esi),%eax
+       movl    4(%esi),%ebx
+       movl    8(%esi),%ecx
+       movl    12(%esi),%edx
+       bswap   %eax
+       bswap   %ebx
+       bswap   %ecx
+       bswap   %edx
+       movl    %eax,(%edi)
+       movl    %ebx,4(%edi)
+       movl    %ecx,8(%edi)
+       movl    %edx,12(%edi)
+       cmpl    $128,%ebp
+       je      .L0081st128
+       movl    16(%esi),%eax
+       movl    20(%esi),%ebx
+       cmpl    $192,%ebp
+       je      .L0091st192
+       movl    24(%esi),%ecx
+       movl    28(%esi),%edx
+       jmp     .L0101st256
+.align 4
+.L0091st192:
+       movl    %eax,%ecx
+       movl    %ebx,%edx
+       notl    %ecx
+       notl    %edx
+.align 4
+.L0101st256:
+       bswap   %eax
+       bswap   %ebx
+       bswap   %ecx
+       bswap   %edx
+       movl    %eax,32(%edi)
+       movl    %ebx,36(%edi)
+       movl    %ecx,40(%edi)
+       movl    %edx,44(%edi)
+       xorl    (%edi),%eax
+       xorl    4(%edi),%ebx
+       xorl    8(%edi),%ecx
+       xorl    12(%edi),%edx
+.align 4
+.L0081st128:
+       call    .L011pic_point
+.L011pic_point:
+       popl    %ebp
+       leal    .LCamellia_SBOX-.L011pic_point(%ebp),%ebp
+       leal    .LCamellia_SIGMA-.LCamellia_SBOX(%ebp),%edi
+       movl    (%edi),%esi
+       movl    %eax,(%esp)
+       movl    %ebx,4(%esp)
+       movl    %ecx,8(%esp)
+       movl    %edx,12(%esp)
+       xorl    %esi,%eax
+       xorl    4(%edi),%ebx
+       movzbl  %ah,%esi
+       movl    2052(%ebp,%esi,8),%edx
+       movzbl  %al,%esi
+       xorl    4(%ebp,%esi,8),%edx
+       shrl    $16,%eax
+       movzbl  %bl,%esi
+       movl    (%ebp,%esi,8),%ecx
+       movzbl  %ah,%esi
+       xorl    (%ebp,%esi,8),%edx
+       movzbl  %bh,%esi
+       xorl    4(%ebp,%esi,8),%ecx
+       shrl    $16,%ebx
+       movzbl  %al,%eax
+       xorl    2048(%ebp,%eax,8),%edx
+       movzbl  %bh,%esi
+       movl    12(%esp),%eax
+       xorl    %edx,%ecx
+       rorl    $8,%edx
+       xorl    2048(%ebp,%esi,8),%ecx
+       movzbl  %bl,%esi
+       movl    8(%esp),%ebx
+       xorl    %eax,%edx
+       xorl    2052(%ebp,%esi,8),%ecx
+       movl    8(%edi),%esi
+       xorl    %ecx,%edx
+       movl    %edx,12(%esp)
+       xorl    %ebx,%ecx
+       movl    %ecx,8(%esp)
+       xorl    %esi,%ecx
+       xorl    12(%edi),%edx
+       movzbl  %ch,%esi
+       movl    2052(%ebp,%esi,8),%ebx
+       movzbl  %cl,%esi
+       xorl    4(%ebp,%esi,8),%ebx
+       shrl    $16,%ecx
+       movzbl  %dl,%esi
+       movl    (%ebp,%esi,8),%eax
+       movzbl  %ch,%esi
+       xorl    (%ebp,%esi,8),%ebx
+       movzbl  %dh,%esi
+       xorl    4(%ebp,%esi,8),%eax
+       shrl    $16,%edx
+       movzbl  %cl,%ecx
+       xorl    2048(%ebp,%ecx,8),%ebx
+       movzbl  %dh,%esi
+       movl    4(%esp),%ecx
+       xorl    %ebx,%eax
+       rorl    $8,%ebx
+       xorl    2048(%ebp,%esi,8),%eax
+       movzbl  %dl,%esi
+       movl    (%esp),%edx
+       xorl    %ecx,%ebx
+       xorl    2052(%ebp,%esi,8),%eax
+       movl    16(%edi),%esi
+       xorl    %eax,%ebx
+       movl    %ebx,4(%esp)
+       xorl    %edx,%eax
+       movl    %eax,(%esp)
+       movl    8(%esp),%ecx
+       movl    12(%esp),%edx
+       movl    44(%esp),%esi
+       xorl    (%esi),%eax
+       xorl    4(%esi),%ebx
+       xorl    8(%esi),%ecx
+       xorl    12(%esi),%edx
+       movl    16(%edi),%esi
+       movl    %eax,(%esp)
+       movl    %ebx,4(%esp)
+       movl    %ecx,8(%esp)
+       movl    %edx,12(%esp)
+       xorl    %esi,%eax
+       xorl    20(%edi),%ebx
+       movzbl  %ah,%esi
+       movl    2052(%ebp,%esi,8),%edx
+       movzbl  %al,%esi
+       xorl    4(%ebp,%esi,8),%edx
+       shrl    $16,%eax
+       movzbl  %bl,%esi
+       movl    (%ebp,%esi,8),%ecx
+       movzbl  %ah,%esi
+       xorl    (%ebp,%esi,8),%edx
+       movzbl  %bh,%esi
+       xorl    4(%ebp,%esi,8),%ecx
+       shrl    $16,%ebx
+       movzbl  %al,%eax
+       xorl    2048(%ebp,%eax,8),%edx
+       movzbl  %bh,%esi
+       movl    12(%esp),%eax
+       xorl    %edx,%ecx
+       rorl    $8,%edx
+       xorl    2048(%ebp,%esi,8),%ecx
+       movzbl  %bl,%esi
+       movl    8(%esp),%ebx
+       xorl    %eax,%edx
+       xorl    2052(%ebp,%esi,8),%ecx
+       movl    24(%edi),%esi
+       xorl    %ecx,%edx
+       movl    %edx,12(%esp)
+       xorl    %ebx,%ecx
+       movl    %ecx,8(%esp)
+       xorl    %esi,%ecx
+       xorl    28(%edi),%edx
+       movzbl  %ch,%esi
+       movl    2052(%ebp,%esi,8),%ebx
+       movzbl  %cl,%esi
+       xorl    4(%ebp,%esi,8),%ebx
+       shrl    $16,%ecx
+       movzbl  %dl,%esi
+       movl    (%ebp,%esi,8),%eax
+       movzbl  %ch,%esi
+       xorl    (%ebp,%esi,8),%ebx
+       movzbl  %dh,%esi
+       xorl    4(%ebp,%esi,8),%eax
+       shrl    $16,%edx
+       movzbl  %cl,%ecx
+       xorl    2048(%ebp,%ecx,8),%ebx
+       movzbl  %dh,%esi
+       movl    4(%esp),%ecx
+       xorl    %ebx,%eax
+       rorl    $8,%ebx
+       xorl    2048(%ebp,%esi,8),%eax
+       movzbl  %dl,%esi
+       movl    (%esp),%edx
+       xorl    %ecx,%ebx
+       xorl    2052(%ebp,%esi,8),%eax
+       movl    32(%edi),%esi
+       xorl    %eax,%ebx
+       movl    %ebx,4(%esp)
+       xorl    %edx,%eax
+       movl    %eax,(%esp)
+       movl    8(%esp),%ecx
+       movl    12(%esp),%edx
+       movl    36(%esp),%esi
+       cmpl    $128,%esi
+       jne     .L0122nd256
+       movl    44(%esp),%edi
+       leal    128(%edi),%edi
+       movl    %eax,-112(%edi)
+       movl    %ebx,-108(%edi)
+       movl    %ecx,-104(%edi)
+       movl    %edx,-100(%edi)
+       movl    %eax,%ebp
+       shll    $15,%eax
+       movl    %ebx,%esi
+       shrl    $17,%esi
+       shll    $15,%ebx
+       orl     %esi,%eax
+       movl    %ecx,%esi
+       shll    $15,%ecx
+       movl    %eax,-80(%edi)
+       shrl    $17,%esi
+       orl     %esi,%ebx
+       shrl    $17,%ebp
+       movl    %edx,%esi
+       shrl    $17,%esi
+       movl    %ebx,-76(%edi)
+       shll    $15,%edx
+       orl     %esi,%ecx
+       orl     %ebp,%edx
+       movl    %ecx,-72(%edi)
+       movl    %edx,-68(%edi)
+       movl    %eax,%ebp
+       shll    $15,%eax
+       movl    %ebx,%esi
+       shrl    $17,%esi
+       shll    $15,%ebx
+       orl     %esi,%eax
+       movl    %ecx,%esi
+       shll    $15,%ecx
+       movl    %eax,-64(%edi)
+       shrl    $17,%esi
+       orl     %esi,%ebx
+       shrl    $17,%ebp
+       movl    %edx,%esi
+       shrl    $17,%esi
+       movl    %ebx,-60(%edi)
+       shll    $15,%edx
+       orl     %esi,%ecx
+       orl     %ebp,%edx
+       movl    %ecx,-56(%edi)
+       movl    %edx,-52(%edi)
+       movl    %eax,%ebp
+       shll    $15,%eax
+       movl    %ebx,%esi
+       shrl    $17,%esi
+       shll    $15,%ebx
+       orl     %esi,%eax
+       movl    %ecx,%esi
+       shll    $15,%ecx
+       movl    %eax,-32(%edi)
+       shrl    $17,%esi
+       orl     %esi,%ebx
+       shrl    $17,%ebp
+       movl    %edx,%esi
+       shrl    $17,%esi
+       movl    %ebx,-28(%edi)
+       shll    $15,%edx
+       orl     %esi,%ecx
+       orl     %ebp,%edx
+       movl    %eax,%ebp
+       shll    $15,%eax
+       movl    %ebx,%esi
+       shrl    $17,%esi
+       shll    $15,%ebx
+       orl     %esi,%eax
+       movl    %ecx,%esi
+       shll    $15,%ecx
+       movl    %eax,-16(%edi)
+       shrl    $17,%esi
+       orl     %esi,%ebx
+       shrl    $17,%ebp
+       movl    %edx,%esi
+       shrl    $17,%esi
+       movl    %ebx,-12(%edi)
+       shll    $15,%edx
+       orl     %esi,%ecx
+       orl     %ebp,%edx
+       movl    %ecx,-8(%edi)
+       movl    %edx,-4(%edi)
+       movl    %ebx,%ebp
+       shll    $2,%ebx
+       movl    %ecx,%esi
+       shrl    $30,%esi
+       shll    $2,%ecx
+       orl     %esi,%ebx
+       movl    %edx,%esi
+       shll    $2,%edx
+       movl    %ebx,32(%edi)
+       shrl    $30,%esi
+       orl     %esi,%ecx
+       shrl    $30,%ebp
+       movl    %eax,%esi
+       shrl    $30,%esi
+       movl    %ecx,36(%edi)
+       shll    $2,%eax
+       orl     %esi,%edx
+       orl     %ebp,%eax
+       movl    %edx,40(%edi)
+       movl    %eax,44(%edi)
+       movl    %ebx,%ebp
+       shll    $17,%ebx
+       movl    %ecx,%esi
+       shrl    $15,%esi
+       shll    $17,%ecx
+       orl     %esi,%ebx
+       movl    %edx,%esi
+       shll    $17,%edx
+       movl    %ebx,64(%edi)
+       shrl    $15,%esi
+       orl     %esi,%ecx
+       shrl    $15,%ebp
+       movl    %eax,%esi
+       shrl    $15,%esi
+       movl    %ecx,68(%edi)
+       shll    $17,%eax
+       orl     %esi,%edx
+       orl     %ebp,%eax
+       movl    %edx,72(%edi)
+       movl    %eax,76(%edi)
+       movl    -128(%edi),%ebx
+       movl    -124(%edi),%ecx
+       movl    -120(%edi),%edx
+       movl    -116(%edi),%eax
+       movl    %ebx,%ebp
+       shll    $15,%ebx
+       movl    %ecx,%esi
+       shrl    $17,%esi
+       shll    $15,%ecx
+       orl     %esi,%ebx
+       movl    %edx,%esi
+       shll    $15,%edx
+       movl    %ebx,-96(%edi)
+       shrl    $17,%esi
+       orl     %esi,%ecx
+       shrl    $17,%ebp
+       movl    %eax,%esi
+       shrl    $17,%esi
+       movl    %ecx,-92(%edi)
+       shll    $15,%eax
+       orl     %esi,%edx
+       orl     %ebp,%eax
+       movl    %edx,-88(%edi)
+       movl    %eax,-84(%edi)
+       movl    %ebx,%ebp
+       shll    $30,%ebx
+       movl    %ecx,%esi
+       shrl    $2,%esi
+       shll    $30,%ecx
+       orl     %esi,%ebx
+       movl    %edx,%esi
+       shll    $30,%edx
+       movl    %ebx,-48(%edi)
+       shrl    $2,%esi
+       orl     %esi,%ecx
+       shrl    $2,%ebp
+       movl    %eax,%esi
+       shrl    $2,%esi
+       movl    %ecx,-44(%edi)
+       shll    $30,%eax
+       orl     %esi,%edx
+       orl     %ebp,%eax
+       movl    %edx,-40(%edi)
+       movl    %eax,-36(%edi)
+       movl    %ebx,%ebp
+       shll    $15,%ebx
+       movl    %ecx,%esi
+       shrl    $17,%esi
+       shll    $15,%ecx
+       orl     %esi,%ebx
+       movl    %edx,%esi
+       shll    $15,%edx
+       shrl    $17,%esi
+       orl     %esi,%ecx
+       shrl    $17,%ebp
+       movl    %eax,%esi
+       shrl    $17,%esi
+       shll    $15,%eax
+       orl     %esi,%edx
+       orl     %ebp,%eax
+       movl    %edx,-24(%edi)
+       movl    %eax,-20(%edi)
+       movl    %ebx,%ebp
+       shll    $17,%ebx
+       movl    %ecx,%esi
+       shrl    $15,%esi
+       shll    $17,%ecx
+       orl     %esi,%ebx
+       movl    %edx,%esi
+       shll    $17,%edx
+       movl    %ebx,(%edi)
+       shrl    $15,%esi
+       orl     %esi,%ecx
+       shrl    $15,%ebp
+       movl    %eax,%esi
+       shrl    $15,%esi
+       movl    %ecx,4(%edi)
+       shll    $17,%eax
+       orl     %esi,%edx
+       orl     %ebp,%eax
+       movl    %edx,8(%edi)
+       movl    %eax,12(%edi)
+       movl    %ebx,%ebp
+       shll    $17,%ebx
+       movl    %ecx,%esi
+       shrl    $15,%esi
+       shll    $17,%ecx
+       orl     %esi,%ebx
+       movl    %edx,%esi
+       shll    $17,%edx
+       movl    %ebx,16(%edi)
+       shrl    $15,%esi
+       orl     %esi,%ecx
+       shrl    $15,%ebp
+       movl    %eax,%esi
+       shrl    $15,%esi
+       movl    %ecx,20(%edi)
+       shll    $17,%eax
+       orl     %esi,%edx
+       orl     %ebp,%eax
+       movl    %edx,24(%edi)
+       movl    %eax,28(%edi)
+       movl    %ebx,%ebp
+       shll    $17,%ebx
+       movl    %ecx,%esi
+       shrl    $15,%esi
+       shll    $17,%ecx
+       orl     %esi,%ebx
+       movl    %edx,%esi
+       shll    $17,%edx
+       movl    %ebx,48(%edi)
+       shrl    $15,%esi
+       orl     %esi,%ecx
+       shrl    $15,%ebp
+       movl    %eax,%esi
+       shrl    $15,%esi
+       movl    %ecx,52(%edi)
+       shll    $17,%eax
+       orl     %esi,%edx
+       orl     %ebp,%eax
+       movl    %edx,56(%edi)
+       movl    %eax,60(%edi)
+       movl    $3,%eax
+       jmp     .L013done
+.align 16
+.L0122nd256:
+       movl    44(%esp),%esi
+       movl    %eax,48(%esi)
+       movl    %ebx,52(%esi)
+       movl    %ecx,56(%esi)
+       movl    %edx,60(%esi)
+       xorl    32(%esi),%eax
+       xorl    36(%esi),%ebx
+       xorl    40(%esi),%ecx
+       xorl    44(%esi),%edx
+       movl    32(%edi),%esi
+       movl    %eax,(%esp)
+       movl    %ebx,4(%esp)
+       movl    %ecx,8(%esp)
+       movl    %edx,12(%esp)
+       xorl    %esi,%eax
+       xorl    36(%edi),%ebx
+       movzbl  %ah,%esi
+       movl    2052(%ebp,%esi,8),%edx
+       movzbl  %al,%esi
+       xorl    4(%ebp,%esi,8),%edx
+       shrl    $16,%eax
+       movzbl  %bl,%esi
+       movl    (%ebp,%esi,8),%ecx
+       movzbl  %ah,%esi
+       xorl    (%ebp,%esi,8),%edx
+       movzbl  %bh,%esi
+       xorl    4(%ebp,%esi,8),%ecx
+       shrl    $16,%ebx
+       movzbl  %al,%eax
+       xorl    2048(%ebp,%eax,8),%edx
+       movzbl  %bh,%esi
+       movl    12(%esp),%eax
+       xorl    %edx,%ecx
+       rorl    $8,%edx
+       xorl    2048(%ebp,%esi,8),%ecx
+       movzbl  %bl,%esi
+       movl    8(%esp),%ebx
+       xorl    %eax,%edx
+       xorl    2052(%ebp,%esi,8),%ecx
+       movl    40(%edi),%esi
+       xorl    %ecx,%edx
+       movl    %edx,12(%esp)
+       xorl    %ebx,%ecx
+       movl    %ecx,8(%esp)
+       xorl    %esi,%ecx
+       xorl    44(%edi),%edx
+       movzbl  %ch,%esi
+       movl    2052(%ebp,%esi,8),%ebx
+       movzbl  %cl,%esi
+       xorl    4(%ebp,%esi,8),%ebx
+       shrl    $16,%ecx
+       movzbl  %dl,%esi
+       movl    (%ebp,%esi,8),%eax
+       movzbl  %ch,%esi
+       xorl    (%ebp,%esi,8),%ebx
+       movzbl  %dh,%esi
+       xorl    4(%ebp,%esi,8),%eax
+       shrl    $16,%edx
+       movzbl  %cl,%ecx
+       xorl    2048(%ebp,%ecx,8),%ebx
+       movzbl  %dh,%esi
+       movl    4(%esp),%ecx
+       xorl    %ebx,%eax
+       rorl    $8,%ebx
+       xorl    2048(%ebp,%esi,8),%eax
+       movzbl  %dl,%esi
+       movl    (%esp),%edx
+       xorl    %ecx,%ebx
+       xorl    2052(%ebp,%esi,8),%eax
+       movl    48(%edi),%esi
+       xorl    %eax,%ebx
+       movl    %ebx,4(%esp)
+       xorl    %edx,%eax
+       movl    %eax,(%esp)
+       movl    8(%esp),%ecx
+       movl    12(%esp),%edx
+       movl    44(%esp),%edi
+       leal    128(%edi),%edi
+       movl    %eax,-112(%edi)
+       movl    %ebx,-108(%edi)
+       movl    %ecx,-104(%edi)
+       movl    %edx,-100(%edi)
+       movl    %eax,%ebp
+       shll    $30,%eax
+       movl    %ebx,%esi
+       shrl    $2,%esi
+       shll    $30,%ebx
+       orl     %esi,%eax
+       movl    %ecx,%esi
+       shll    $30,%ecx
+       movl    %eax,-48(%edi)
+       shrl    $2,%esi
+       orl     %esi,%ebx
+       shrl    $2,%ebp
+       movl    %edx,%esi
+       shrl    $2,%esi
+       movl    %ebx,-44(%edi)
+       shll    $30,%edx
+       orl     %esi,%ecx
+       orl     %ebp,%edx
+       movl    %ecx,-40(%edi)
+       movl    %edx,-36(%edi)
+       movl    %eax,%ebp
+       shll    $30,%eax
+       movl    %ebx,%esi
+       shrl    $2,%esi
+       shll    $30,%ebx
+       orl     %esi,%eax
+       movl    %ecx,%esi
+       shll    $30,%ecx
+       movl    %eax,32(%edi)
+       shrl    $2,%esi
+       orl     %esi,%ebx
+       shrl    $2,%ebp
+       movl    %edx,%esi
+       shrl    $2,%esi
+       movl    %ebx,36(%edi)
+       shll    $30,%edx
+       orl     %esi,%ecx
+       orl     %ebp,%edx
+       movl    %ecx,40(%edi)
+       movl    %edx,44(%edi)
+       movl    %ebx,%ebp
+       shll    $19,%ebx
+       movl    %ecx,%esi
+       shrl    $13,%esi
+       shll    $19,%ecx
+       orl     %esi,%ebx
+       movl    %edx,%esi
+       shll    $19,%edx
+       movl    %ebx,128(%edi)
+       shrl    $13,%esi
+       orl     %esi,%ecx
+       shrl    $13,%ebp
+       movl    %eax,%esi
+       shrl    $13,%esi
+       movl    %ecx,132(%edi)
+       shll    $19,%eax
+       orl     %esi,%edx
+       orl     %ebp,%eax
+       movl    %edx,136(%edi)
+       movl    %eax,140(%edi)
+       movl    -96(%edi),%ebx
+       movl    -92(%edi),%ecx
+       movl    -88(%edi),%edx
+       movl    -84(%edi),%eax
+       movl    %ebx,%ebp
+       shll    $15,%ebx
+       movl    %ecx,%esi
+       shrl    $17,%esi
+       shll    $15,%ecx
+       orl     %esi,%ebx
+       movl    %edx,%esi
+       shll    $15,%edx
+       movl    %ebx,-96(%edi)
+       shrl    $17,%esi
+       orl     %esi,%ecx
+       shrl    $17,%ebp
+       movl    %eax,%esi
+       shrl    $17,%esi
+       movl    %ecx,-92(%edi)
+       shll    $15,%eax
+       orl     %esi,%edx
+       orl     %ebp,%eax
+       movl    %edx,-88(%edi)
+       movl    %eax,-84(%edi)
+       movl    %ebx,%ebp
+       shll    $15,%ebx
+       movl    %ecx,%esi
+       shrl    $17,%esi
+       shll    $15,%ecx
+       orl     %esi,%ebx
+       movl    %edx,%esi
+       shll    $15,%edx
+       movl    %ebx,-64(%edi)
+       shrl    $17,%esi
+       orl     %esi,%ecx
+       shrl    $17,%ebp
+       movl    %eax,%esi
+       shrl    $17,%esi
+       movl    %ecx,-60(%edi)
+       shll    $15,%eax
+       orl     %esi,%edx
+       orl     %ebp,%eax
+       movl    %edx,-56(%edi)
+       movl    %eax,-52(%edi)
+       movl    %ebx,%ebp
+       shll    $30,%ebx
+       movl    %ecx,%esi
+       shrl    $2,%esi
+       shll    $30,%ecx
+       orl     %esi,%ebx
+       movl    %edx,%esi
+       shll    $30,%edx
+       movl    %ebx,16(%edi)
+       shrl    $2,%esi
+       orl     %esi,%ecx
+       shrl    $2,%ebp
+       movl    %eax,%esi
+       shrl    $2,%esi
+       movl    %ecx,20(%edi)
+       shll    $30,%eax
+       orl     %esi,%edx
+       orl     %ebp,%eax
+       movl    %edx,24(%edi)
+       movl    %eax,28(%edi)
+       movl    %ecx,%ebp
+       shll    $2,%ecx
+       movl    %edx,%esi
+       shrl    $30,%esi
+       shll    $2,%edx
+       orl     %esi,%ecx
+       movl    %eax,%esi
+       shll    $2,%eax
+       movl    %ecx,80(%edi)
+       shrl    $30,%esi
+       orl     %esi,%edx
+       shrl    $30,%ebp
+       movl    %ebx,%esi
+       shrl    $30,%esi
+       movl    %edx,84(%edi)
+       shll    $2,%ebx
+       orl     %esi,%eax
+       orl     %ebp,%ebx
+       movl    %eax,88(%edi)
+       movl    %ebx,92(%edi)
+       movl    -80(%edi),%ecx
+       movl    -76(%edi),%edx
+       movl    -72(%edi),%eax
+       movl    -68(%edi),%ebx
+       movl    %ecx,%ebp
+       shll    $15,%ecx
+       movl    %edx,%esi
+       shrl    $17,%esi
+       shll    $15,%edx
+       orl     %esi,%ecx
+       movl    %eax,%esi
+       shll    $15,%eax
+       movl    %ecx,-80(%edi)
+       shrl    $17,%esi
+       orl     %esi,%edx
+       shrl    $17,%ebp
+       movl    %ebx,%esi
+       shrl    $17,%esi
+       movl    %edx,-76(%edi)
+       shll    $15,%ebx
+       orl     %esi,%eax
+       orl     %ebp,%ebx
+       movl    %eax,-72(%edi)
+       movl    %ebx,-68(%edi)
+       movl    %ecx,%ebp
+       shll    $30,%ecx
+       movl    %edx,%esi
+       shrl    $2,%esi
+       shll    $30,%edx
+       orl     %esi,%ecx
+       movl    %eax,%esi
+       shll    $30,%eax
+       movl    %ecx,-16(%edi)
+       shrl    $2,%esi
+       orl     %esi,%edx
+       shrl    $2,%ebp
+       movl    %ebx,%esi
+       shrl    $2,%esi
+       movl    %edx,-12(%edi)
+       shll    $30,%ebx
+       orl     %esi,%eax
+       orl     %ebp,%ebx
+       movl    %eax,-8(%edi)
+       movl    %ebx,-4(%edi)
+       movl    %edx,64(%edi)
+       movl    %eax,68(%edi)
+       movl    %ebx,72(%edi)
+       movl    %ecx,76(%edi)
+       movl    %edx,%ebp
+       shll    $17,%edx
+       movl    %eax,%esi
+       shrl    $15,%esi
+       shll    $17,%eax
+       orl     %esi,%edx
+       movl    %ebx,%esi
+       shll    $17,%ebx
+       movl    %edx,96(%edi)
+       shrl    $15,%esi
+       orl     %esi,%eax
+       shrl    $15,%ebp
+       movl    %ecx,%esi
+       shrl    $15,%esi
+       movl    %eax,100(%edi)
+       shll    $17,%ecx
+       orl     %esi,%ebx
+       orl     %ebp,%ecx
+       movl    %ebx,104(%edi)
+       movl    %ecx,108(%edi)
+       movl    -128(%edi),%edx
+       movl    -124(%edi),%eax
+       movl    -120(%edi),%ebx
+       movl    -116(%edi),%ecx
+       movl    %eax,%ebp
+       shll    $13,%eax
+       movl    %ebx,%esi
+       shrl    $19,%esi
+       shll    $13,%ebx
+       orl     %esi,%eax
+       movl    %ecx,%esi
+       shll    $13,%ecx
+       movl    %eax,-32(%edi)
+       shrl    $19,%esi
+       orl     %esi,%ebx
+       shrl    $19,%ebp
+       movl    %edx,%esi
+       shrl    $19,%esi
+       movl    %ebx,-28(%edi)
+       shll    $13,%edx
+       orl     %esi,%ecx
+       orl     %ebp,%edx
+       movl    %ecx,-24(%edi)
+       movl    %edx,-20(%edi)
+       movl    %eax,%ebp
+       shll    $15,%eax
+       movl    %ebx,%esi
+       shrl    $17,%esi
+       shll    $15,%ebx
+       orl     %esi,%eax
+       movl    %ecx,%esi
+       shll    $15,%ecx
+       movl    %eax,(%edi)
+       shrl    $17,%esi
+       orl     %esi,%ebx
+       shrl    $17,%ebp
+       movl    %edx,%esi
+       shrl    $17,%esi
+       movl    %ebx,4(%edi)
+       shll    $15,%edx
+       orl     %esi,%ecx
+       orl     %ebp,%edx
+       movl    %ecx,8(%edi)
+       movl    %edx,12(%edi)
+       movl    %eax,%ebp
+       shll    $17,%eax
+       movl    %ebx,%esi
+       shrl    $15,%esi
+       shll    $17,%ebx
+       orl     %esi,%eax
+       movl    %ecx,%esi
+       shll    $17,%ecx
+       movl    %eax,48(%edi)
+       shrl    $15,%esi
+       orl     %esi,%ebx
+       shrl    $15,%ebp
+       movl    %edx,%esi
+       shrl    $15,%esi
+       movl    %ebx,52(%edi)
+       shll    $17,%edx
+       orl     %esi,%ecx
+       orl     %ebp,%edx
+       movl    %ecx,56(%edi)
+       movl    %edx,60(%edi)
+       movl    %ebx,%ebp
+       shll    $2,%ebx
+       movl    %ecx,%esi
+       shrl    $30,%esi
+       shll    $2,%ecx
+       orl     %esi,%ebx
+       movl    %edx,%esi
+       shll    $2,%edx
+       movl    %ebx,112(%edi)
+       shrl    $30,%esi
+       orl     %esi,%ecx
+       shrl    $30,%ebp
+       movl    %eax,%esi
+       shrl    $30,%esi
+       movl    %ecx,116(%edi)
+       shll    $2,%eax
+       orl     %esi,%edx
+       orl     %ebp,%eax
+       movl    %edx,120(%edi)
+       movl    %eax,124(%edi)
+       movl    $4,%eax
+.L013done:
+       leal    144(%edi),%edx
+       addl    $16,%esp
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  Camellia_Ekeygen,.-.L_Camellia_Ekeygen_begin
+.globl Camellia_set_key
+.type  Camellia_set_key,@function
+.align 16
+Camellia_set_key:
+.L_Camellia_set_key_begin:
+       pushl   %ebx
+       movl    8(%esp),%ecx
+       movl    12(%esp),%ebx
+       movl    16(%esp),%edx
+       movl    $-1,%eax
+       testl   %ecx,%ecx
+       jz      .L014done
+       testl   %edx,%edx
+       jz      .L014done
+       movl    $-2,%eax
+       cmpl    $256,%ebx
+       je      .L015arg_ok
+       cmpl    $192,%ebx
+       je      .L015arg_ok
+       cmpl    $128,%ebx
+       jne     .L014done
+.align 4
+.L015arg_ok:
+       pushl   %edx
+       pushl   %ecx
+       pushl   %ebx
+       call    .L_Camellia_Ekeygen_begin
+       addl    $12,%esp
+       movl    %eax,(%edx)
+       xorl    %eax,%eax
+.align 4
+.L014done:
+       popl    %ebx
+       ret
+.size  Camellia_set_key,.-.L_Camellia_set_key_begin
+.align 64
+.LCamellia_SIGMA:
+.long  2694735487,1003262091,3061508184,1286239154,3337565999,3914302142,1426019237,4057165596,283453434,3731369245,2958461122,3018244605,0,0,0,0
+.align 64
+.LCamellia_SBOX:
+.long  1886416896,1886388336
+.long  2189591040,741081132
+.long  741092352,3014852787
+.long  3974949888,3233808576
+.long  3014898432,3840147684
+.long  656877312,1465319511
+.long  3233857536,3941204202
+.long  3857048832,2930639022
+.long  3840205824,589496355
+.long  2240120064,1802174571
+.long  1465341696,1162149957
+.long  892679424,2779054245
+.long  3941263872,3991732461
+.long  202116096,1330577487
+.long  2930683392,488439837
+.long  1094795520,2459041938
+.long  589505280,2256928902
+.long  4025478912,2947481775
+.long  1802201856,2088501372
+.long  2475922176,522125343
+.long  1162167552,1044250686
+.long  421075200,3705405660
+.long  2779096320,1583218782
+.long  555819264,185270283
+.long  3991792896,2795896998
+.long  235802112,960036921
+.long  1330597632,3587506389
+.long  1313754624,1566376029
+.long  488447232,3654877401
+.long  1701143808,1515847770
+.long  2459079168,1364262993
+.long  3183328512,1819017324
+.long  2256963072,2341142667
+.long  3099113472,2593783962
+.long  2947526400,4227531003
+.long  2408550144,2964324528
+.long  2088532992,1953759348
+.long  3958106880,724238379
+.long  522133248,4042260720
+.long  3469659648,2223243396
+.long  1044266496,3755933919
+.long  808464384,3419078859
+.long  3705461760,875823156
+.long  1600085760,1987444854
+.long  1583242752,1835860077
+.long  3318072576,2846425257
+.long  185273088,3520135377
+.long  437918208,67371012
+.long  2795939328,336855060
+.long  3789676800,976879674
+.long  960051456,3739091166
+.long  3402287616,286326801
+.long  3587560704,842137650
+.long  1195853568,2627469468
+.long  1566399744,1397948499
+.long  1027423488,4075946226
+.long  3654932736,4278059262
+.long  16843008,3486449871
+.long  1515870720,3284336835
+.long  3604403712,2054815866
+.long  1364283648,606339108
+.long  1448498688,3907518696
+.long  1819044864,1616904288
+.long  1296911616,1768489065
+.long  2341178112,2863268010
+.long  218959104,2694840480
+.long  2593823232,2711683233
+.long  1717986816,1650589794
+.long  4227595008,1414791252
+.long  3435973632,505282590
+.long  2964369408,3772776672
+.long  757935360,1684275300
+.long  1953788928,269484048
+.long  303174144,0
+.long  724249344,2745368739
+.long  538976256,1970602101
+.long  4042321920,2324299914
+.long  2981212416,3873833190
+.long  2223277056,151584777
+.long  2576980224,3722248413
+.long  3755990784,2273771655
+.long  1280068608,2206400643
+.long  3419130624,3452764365
+.long  3267543552,2425356432
+.long  875836416,1936916595
+.long  2122219008,4143317238
+.long  1987474944,2644312221
+.long  84215040,3216965823
+.long  1835887872,1381105746
+.long  3082270464,3638034648
+.long  2846468352,3368550600
+.long  825307392,3334865094
+.long  3520188672,2172715137
+.long  387389184,1869545583
+.long  67372032,320012307
+.long  3621246720,1667432547
+.long  336860160,3924361449
+.long  1482184704,2812739751
+.long  976894464,2677997727
+.long  1633771776,3166437564
+.long  3739147776,690552873
+.long  454761216,4193845497
+.long  286331136,791609391
+.long  471604224,3031695540
+.long  842150400,2021130360
+.long  252645120,101056518
+.long  2627509248,3890675943
+.long  370546176,1903231089
+.long  1397969664,3570663636
+.long  404232192,2880110763
+.long  4076007936,2290614408
+.long  572662272,2374828173
+.long  4278124032,1920073842
+.long  1145324544,3115909305
+.long  3486502656,4177002744
+.long  2998055424,2896953516
+.long  3284386560,909508662
+.long  3048584448,707395626
+.long  2054846976,1010565180
+.long  2442236160,4059103473
+.long  606348288,1077936192
+.long  134744064,3553820883
+.long  3907577856,3149594811
+.long  2829625344,1128464451
+.long  1616928768,353697813
+.long  4244438016,2913796269
+.long  1768515840,2004287607
+.long  1347440640,2155872384
+.long  2863311360,2189557890
+.long  3503345664,3974889708
+.long  2694881280,656867367
+.long  2105376000,3856990437
+.long  2711724288,2240086149
+.long  2307492096,892665909
+.long  1650614784,202113036
+.long  2543294208,1094778945
+.long  1414812672,4025417967
+.long  1532713728,2475884691
+.long  505290240,421068825
+.long  2509608192,555810849
+.long  3772833792,235798542
+.long  4294967040,1313734734
+.long  1684300800,1701118053
+.long  3537031680,3183280317
+.long  269488128,3099066552
+.long  3301229568,2408513679
+.long  0,3958046955
+.long  1212696576,3469607118
+.long  2745410304,808452144
+.long  4160222976,1600061535
+.long  1970631936,3318022341
+.long  3688618752,437911578
+.long  2324335104,3789619425
+.long  50529024,3402236106
+.long  3873891840,1195835463
+.long  3671775744,1027407933
+.long  151587072,16842753
+.long  1061109504,3604349142
+.long  3722304768,1448476758
+.long  2492765184,1296891981
+.long  2273806080,218955789
+.long  1549556736,1717960806
+.long  2206434048,3435921612
+.long  33686016,757923885
+.long  3452816640,303169554
+.long  1246382592,538968096
+.long  2425393152,2981167281
+.long  858993408,2576941209
+.long  1936945920,1280049228
+.long  1734829824,3267494082
+.long  4143379968,2122186878
+.long  4092850944,84213765
+.long  2644352256,3082223799
+.long  2139062016,825294897
+.long  3217014528,387383319
+.long  3806519808,3621191895
+.long  1381126656,1482162264
+.long  2610666240,1633747041
+.long  3638089728,454754331
+.long  640034304,471597084
+.long  3368601600,252641295
+.long  926365440,370540566
+.long  3334915584,404226072
+.long  993737472,572653602
+.long  2172748032,1145307204
+.long  2526451200,2998010034
+.long  1869573888,3048538293
+.long  1263225600,2442199185
+.long  320017152,134742024
+.long  3200171520,2829582504
+.long  1667457792,4244373756
+.long  774778368,1347420240
+.long  3924420864,3503292624
+.long  2038003968,2105344125
+.long  2812782336,2307457161
+.long  2358021120,2543255703
+.long  2678038272,1532690523
+.long  1852730880,2509570197
+.long  3166485504,4294902015
+.long  2391707136,3536978130
+.long  690563328,3301179588
+.long  4126536960,1212678216
+.long  4193908992,4160159991
+.long  3065427456,3688562907
+.long  791621376,50528259
+.long  4261281024,3671720154
+.long  3031741440,1061093439
+.long  1499027712,2492727444
+.long  2021160960,1549533276
+.long  2560137216,33685506
+.long  101058048,1246363722
+.long  1785358848,858980403
+.long  3890734848,1734803559
+.long  1179010560,4092788979
+.long  1903259904,2139029631
+.long  3132799488,3806462178
+.long  3570717696,2610626715
+.long  623191296,640024614
+.long  2880154368,926351415
+.long  1111638528,993722427
+.long  2290649088,2526412950
+.long  2728567296,1263206475
+.long  2374864128,3200123070
+.long  4210752000,774766638
+.long  1920102912,2037973113
+.long  117901056,2357985420
+.long  3115956480,1852702830
+.long  1431655680,2391670926
+.long  4177065984,4126474485
+.long  4008635904,3065381046
+.long  2896997376,4261216509
+.long  168430080,1499005017
+.long  909522432,2560098456
+.long  1229539584,1785331818
+.long  707406336,1178992710
+.long  1751672832,3132752058
+.long  1010580480,623181861
+.long  943208448,1111621698
+.long  4059164928,2728525986
+.long  2762253312,4210688250
+.long  1077952512,117899271
+.long  673720320,1431634005
+.long  3553874688,4008575214
+.long  2071689984,168427530
+.long  3149642496,1229520969
+.long  3385444608,1751646312
+.long  1128481536,943194168
+.long  3250700544,2762211492
+.long  353703168,673710120
+.long  3823362816,2071658619
+.long  2913840384,3385393353
+.long  4109693952,3250651329
+.long  2004317952,3823304931
+.long  3351758592,4109631732
+.long  2155905024,3351707847
+.long  2661195264,2661154974
+.long  14737632,939538488
+.long  328965,1090535745
+.long  5789784,369104406
+.long  14277081,1979741814
+.long  6776679,3640711641
+.long  5131854,2466288531
+.long  8487297,1610637408
+.long  13355979,4060148466
+.long  13224393,1912631922
+.long  723723,3254829762
+.long  11447982,2868947883
+.long  6974058,2583730842
+.long  14013909,1962964341
+.long  1579032,100664838
+.long  6118749,1459640151
+.long  8553090,2684395680
+.long  4605510,2432733585
+.long  14671839,4144035831
+.long  14079702,3036722613
+.long  2565927,3372272073
+.long  9079434,2717950626
+.long  3289650,2348846220
+.long  4934475,3523269330
+.long  4342338,2415956112
+.long  14408667,4127258358
+.long  1842204,117442311
+.long  10395294,2801837991
+.long  10263708,654321447
+.long  3815994,2382401166
+.long  13290186,2986390194
+.long  2434341,1224755529
+.long  8092539,3724599006
+.long  855309,1124090691
+.long  7434609,1543527516
+.long  6250335,3607156695
+.long  2039583,3338717127
+.long  16316664,1040203326
+.long  14145495,4110480885
+.long  4079166,2399178639
+.long  10329501,1728079719
+.long  8158332,520101663
+.long  6316128,402659352
+.long  12171705,1845522030
+.long  12500670,2936057775
+.long  12369084,788541231
+.long  9145227,3791708898
+.long  1447446,2231403909
+.long  3421236,218107149
+.long  5066061,1392530259
+.long  12829635,4026593520
+.long  7500402,2617285788
+.long  9803157,1694524773
+.long  11250603,3925928682
+.long  9342606,2734728099
+.long  12237498,2919280302
+.long  8026746,2650840734
+.long  11776947,3959483628
+.long  131586,2147516544
+.long  11842740,754986285
+.long  11382189,1795189611
+.long  10658466,2818615464
+.long  11316396,721431339
+.long  14211288,905983542
+.long  10132122,2785060518
+.long  1513239,3305162181
+.long  1710618,2248181382
+.long  3487029,1291865421
+.long  13421772,855651123
+.long  16250871,4244700669
+.long  10066329,1711302246
+.long  6381921,1476417624
+.long  5921370,2516620950
+.long  15263976,973093434
+.long  2368548,150997257
+.long  5658198,2499843477
+.long  4210752,268439568
+.long  14803425,2013296760
+.long  6513507,3623934168
+.long  592137,1107313218
+.long  3355443,3422604492
+.long  12566463,4009816047
+.long  10000536,637543974
+.long  9934743,3842041317
+.long  8750469,1627414881
+.long  6842472,436214298
+.long  16579836,1056980799
+.long  15527148,989870907
+.long  657930,2181071490
+.long  14342874,3053500086
+.long  7303023,3674266587
+.long  5460819,3556824276
+.long  6447714,2550175896
+.long  10724259,3892373736
+.long  3026478,2332068747
+.long  526344,33554946
+.long  11513775,3942706155
+.long  2631720,167774730
+.long  11579568,738208812
+.long  7631988,486546717
+.long  12763842,2952835248
+.long  12434877,1862299503
+.long  3552822,2365623693
+.long  2236962,2281736328
+.long  3684408,234884622
+.long  6579300,419436825
+.long  1973790,2264958855
+.long  3750201,1308642894
+.long  2894892,184552203
+.long  10921638,2835392937
+.long  3158064,201329676
+.long  15066597,2030074233
+.long  4473924,285217041
+.long  16645629,2130739071
+.long  8947848,570434082
+.long  10461087,3875596263
+.long  6645093,1493195097
+.long  8882055,3774931425
+.long  7039851,3657489114
+.long  16053492,1023425853
+.long  2302755,3355494600
+.long  4737096,301994514
+.long  1052688,67109892
+.long  13750737,1946186868
+.long  5329233,1409307732
+.long  12632256,805318704
+.long  16382457,2113961598
+.long  13816530,3019945140
+.long  10526880,671098920
+.long  5592405,1426085205
+.long  10592673,1744857192
+.long  4276545,1342197840
+.long  16448250,3187719870
+.long  4408131,3489714384
+.long  1250067,3288384708
+.long  12895428,822096177
+.long  3092271,3405827019
+.long  11053224,704653866
+.long  11974326,2902502829
+.long  3947580,251662095
+.long  2829099,3389049546
+.long  12698049,1879076976
+.long  16777215,4278255615
+.long  13158600,838873650
+.long  10855845,1761634665
+.long  2105376,134219784
+.long  9013641,1644192354
+.long  0,0
+.long  9474192,603989028
+.long  4671303,3506491857
+.long  15724527,4211145723
+.long  15395562,3120609978
+.long  12040119,3976261101
+.long  1381653,1157645637
+.long  394758,2164294017
+.long  13487565,1929409395
+.long  11908533,1828744557
+.long  1184274,2214626436
+.long  8289918,2667618207
+.long  12303291,3993038574
+.long  2697513,1241533002
+.long  986895,3271607235
+.long  12105912,771763758
+.long  460551,3238052289
+.long  263172,16777473
+.long  10197915,3858818790
+.long  9737364,620766501
+.long  2171169,1207978056
+.long  6710886,2566953369
+.long  15132390,3103832505
+.long  13553358,3003167667
+.long  15592941,2063629179
+.long  15198183,4177590777
+.long  3881787,3456159438
+.long  16711422,3204497343
+.long  8355711,3741376479
+.long  12961221,1895854449
+.long  10790052,687876393
+.long  3618615,3439381965
+.long  11645361,1811967084
+.long  5000268,318771987
+.long  9539985,1677747300
+.long  7237230,2600508315
+.long  9276813,1660969827
+.long  7763574,2634063261
+.long  197379,3221274816
+.long  2960685,1258310475
+.long  14606046,3070277559
+.long  9868950,2768283045
+.long  2500134,2298513801
+.long  8224125,1593859935
+.long  13027014,2969612721
+.long  6052956,385881879
+.long  13882323,4093703412
+.long  15921906,3154164924
+.long  5197647,3540046803
+.long  1644825,1174423110
+.long  4144959,3472936911
+.long  14474460,922761015
+.long  7960953,1577082462
+.long  1907997,1191200583
+.long  5395026,2483066004
+.long  15461355,4194368250
+.long  15987699,4227923196
+.long  7171437,1526750043
+.long  6184542,2533398423
+.long  16514043,4261478142
+.long  6908265,1509972570
+.long  11711154,2885725356
+.long  15790320,1006648380
+.long  3223857,1275087948
+.long  789516,50332419
+.long  13948116,889206069
+.long  13619151,4076925939
+.long  9211020,587211555
+.long  14869218,3087055032
+.long  7697781,1560304989
+.long  11119017,1778412138
+.long  4868682,2449511058
+.long  5723991,3573601749
+.long  8684676,553656609
+.long  1118481,1140868164
+.long  4539717,1358975313
+.long  1776411,3321939654
+.long  16119285,2097184125
+.long  15000804,956315961
+.long  921102,2197848963
+.long  7566195,3691044060
+.long  11184810,2852170410
+.long  15856113,2080406652
+.long  14540253,1996519287
+.long  5855577,1442862678
+.long  1315860,83887365
+.long  7105644,452991771
+.long  9605778,2751505572
+.long  5526612,352326933
+.long  13684944,872428596
+.long  7895160,503324190
+.long  7368816,469769244
+.long  14935011,4160813304
+.long  4802889,1375752786
+.long  8421504,536879136
+.long  5263440,335549460
+.long  10987431,3909151209
+.long  16185078,3170942397
+.long  7829367,3707821533
+.long  9671571,3825263844
+.long  8816262,2701173153
+.long  8618883,3758153952
+.long  2763306,2315291274
+.long  13092807,4043370993
+.long  5987163,3590379222
+.long  15329769,2046851706
+.long  15658734,3137387451
+.long  9408399,3808486371
+.long  65793,1073758272
+.long  4013373,1325420367
+.globl Camellia_cbc_encrypt
+.type  Camellia_cbc_encrypt,@function
+.align 16
+Camellia_cbc_encrypt:
+.L_Camellia_cbc_encrypt_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    28(%esp),%ecx
+       cmpl    $0,%ecx
+       je      .L016enc_out
+       pushfl
+       cld
+       movl    24(%esp),%eax
+       movl    28(%esp),%ebx
+       movl    36(%esp),%edx
+       movl    40(%esp),%ebp
+       leal    -64(%esp),%esi
+       andl    $-64,%esi
+       leal    -127(%edx),%edi
+       subl    %esi,%edi
+       negl    %edi
+       andl    $960,%edi
+       subl    %edi,%esi
+       movl    44(%esp),%edi
+       xchgl   %esi,%esp
+       addl    $4,%esp
+       movl    %esi,20(%esp)
+       movl    %eax,24(%esp)
+       movl    %ebx,28(%esp)
+       movl    %ecx,32(%esp)
+       movl    %edx,36(%esp)
+       movl    %ebp,40(%esp)
+       call    .L017pic_point
+.L017pic_point:
+       popl    %ebp
+       leal    .LCamellia_SBOX-.L017pic_point(%ebp),%ebp
+       movl    $32,%esi
+.align 4
+.L018prefetch_sbox:
+       movl    (%ebp),%eax
+       movl    32(%ebp),%ebx
+       movl    64(%ebp),%ecx
+       movl    96(%ebp),%edx
+       leal    128(%ebp),%ebp
+       decl    %esi
+       jnz     .L018prefetch_sbox
+       movl    36(%esp),%eax
+       subl    $4096,%ebp
+       movl    24(%esp),%esi
+       movl    272(%eax),%edx
+       cmpl    $0,%edi
+       je      .L019DECRYPT
+       movl    32(%esp),%ecx
+       movl    40(%esp),%edi
+       shll    $6,%edx
+       leal    (%eax,%edx,1),%edx
+       movl    %edx,16(%esp)
+       testl   $4294967280,%ecx
+       jz      .L020enc_tail
+       movl    (%edi),%eax
+       movl    4(%edi),%ebx
+.align 4
+.L021enc_loop:
+       movl    8(%edi),%ecx
+       movl    12(%edi),%edx
+       xorl    (%esi),%eax
+       xorl    4(%esi),%ebx
+       xorl    8(%esi),%ecx
+       bswap   %eax
+       xorl    12(%esi),%edx
+       bswap   %ebx
+       movl    36(%esp),%edi
+       bswap   %ecx
+       bswap   %edx
+       call    _x86_Camellia_encrypt
+       movl    24(%esp),%esi
+       movl    28(%esp),%edi
+       bswap   %eax
+       bswap   %ebx
+       bswap   %ecx
+       movl    %eax,(%edi)
+       bswap   %edx
+       movl    %ebx,4(%edi)
+       movl    %ecx,8(%edi)
+       movl    %edx,12(%edi)
+       movl    32(%esp),%ecx
+       leal    16(%esi),%esi
+       movl    %esi,24(%esp)
+       leal    16(%edi),%edx
+       movl    %edx,28(%esp)
+       subl    $16,%ecx
+       testl   $4294967280,%ecx
+       movl    %ecx,32(%esp)
+       jnz     .L021enc_loop
+       testl   $15,%ecx
+       jnz     .L020enc_tail
+       movl    40(%esp),%esi
+       movl    8(%edi),%ecx
+       movl    12(%edi),%edx
+       movl    %eax,(%esi)
+       movl    %ebx,4(%esi)
+       movl    %ecx,8(%esi)
+       movl    %edx,12(%esi)
+       movl    20(%esp),%esp
+       popfl
+.L016enc_out:
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+       pushfl
+.align 4
+.L020enc_tail:
+       movl    %edi,%eax
+       movl    28(%esp),%edi
+       pushl&