Update files for OpenSSL-1.0.1 update.
authorPeter Avalos <pavalos@dragonflybsd.org>
Wed, 28 Mar 2012 00:04:42 +0000 (17:04 -0700)
committerPeter Avalos <pavalos@dragonflybsd.org>
Thu, 29 Mar 2012 22:53:09 +0000 (15:53 -0700)
This commit changes the order of sources in Makefiles to better conform
to what's in the OpenSSL distribution.  This should make it easier to
update the build system for future imports.

370 files changed:
Makefile_upgrade.inc
crypto/openssl/crypto/bn/asm/x86_64-mont5.pl [changed mode: 0755->0644]
crypto/openssl/ssl/srtp.h
secure/lib/libcrypto/Makefile
secure/lib/libcrypto/Makefile.inc
secure/lib/libcrypto/asm/Makefile
secure/lib/libcrypto/asm/aes-586.s
secure/lib/libcrypto/asm/aes-x86_64.s
secure/lib/libcrypto/asm/aesni-sha1-x86_64.s [new file with mode: 0644]
secure/lib/libcrypto/asm/aesni-x86.s [new file with mode: 0644]
secure/lib/libcrypto/asm/aesni-x86_64.s [new file with mode: 0644]
secure/lib/libcrypto/asm/bsaes-x86_64.s [new file with mode: 0644]
secure/lib/libcrypto/asm/cmll-x86.s
secure/lib/libcrypto/asm/ghash-x86.s [new file with mode: 0644]
secure/lib/libcrypto/asm/ghash-x86_64.s [new file with mode: 0644]
secure/lib/libcrypto/asm/modexp512-x86_64.s [new file with mode: 0644]
secure/lib/libcrypto/asm/rc4-586.s
secure/lib/libcrypto/asm/rc4-md5-x86_64.s [new file with mode: 0644]
secure/lib/libcrypto/asm/rc4-x86_64.s
secure/lib/libcrypto/asm/sha1-586.s
secure/lib/libcrypto/asm/sha1-x86_64.s
secure/lib/libcrypto/asm/sha256-586.s
secure/lib/libcrypto/asm/sha256-x86_64.s
secure/lib/libcrypto/asm/sha512-x86_64.s
secure/lib/libcrypto/asm/vpaes-x86.s [new file with mode: 0644]
secure/lib/libcrypto/asm/vpaes-x86_64.s [new file with mode: 0644]
secure/lib/libcrypto/asm/x86-gf2m.s [new file with mode: 0644]
secure/lib/libcrypto/asm/x86_64-gf2m.s [new file with mode: 0644]
secure/lib/libcrypto/asm/x86_64-mont.s
secure/lib/libcrypto/asm/x86_64-mont5.s [new file with mode: 0644]
secure/lib/libcrypto/asm/x86_64cpuid.s
secure/lib/libcrypto/asm/x86cpuid.s
secure/lib/libcrypto/man/ASN1_OBJECT_new.3
secure/lib/libcrypto/man/ASN1_STRING_length.3
secure/lib/libcrypto/man/ASN1_STRING_new.3
secure/lib/libcrypto/man/ASN1_STRING_print_ex.3
secure/lib/libcrypto/man/ASN1_generate_nconf.3
secure/lib/libcrypto/man/BIO_ctrl.3
secure/lib/libcrypto/man/BIO_f_base64.3
secure/lib/libcrypto/man/BIO_f_buffer.3
secure/lib/libcrypto/man/BIO_f_cipher.3
secure/lib/libcrypto/man/BIO_f_md.3
secure/lib/libcrypto/man/BIO_f_null.3
secure/lib/libcrypto/man/BIO_f_ssl.3
secure/lib/libcrypto/man/BIO_find_type.3
secure/lib/libcrypto/man/BIO_new.3
secure/lib/libcrypto/man/BIO_new_CMS.3
secure/lib/libcrypto/man/BIO_push.3
secure/lib/libcrypto/man/BIO_read.3
secure/lib/libcrypto/man/BIO_s_accept.3
secure/lib/libcrypto/man/BIO_s_bio.3
secure/lib/libcrypto/man/BIO_s_connect.3
secure/lib/libcrypto/man/BIO_s_fd.3
secure/lib/libcrypto/man/BIO_s_file.3
secure/lib/libcrypto/man/BIO_s_mem.3
secure/lib/libcrypto/man/BIO_s_null.3
secure/lib/libcrypto/man/BIO_s_socket.3
secure/lib/libcrypto/man/BIO_set_callback.3
secure/lib/libcrypto/man/BIO_should_retry.3
secure/lib/libcrypto/man/BN_BLINDING_new.3
secure/lib/libcrypto/man/BN_CTX_new.3
secure/lib/libcrypto/man/BN_CTX_start.3
secure/lib/libcrypto/man/BN_add.3
secure/lib/libcrypto/man/BN_add_word.3
secure/lib/libcrypto/man/BN_bn2bin.3
secure/lib/libcrypto/man/BN_cmp.3
secure/lib/libcrypto/man/BN_copy.3
secure/lib/libcrypto/man/BN_generate_prime.3
secure/lib/libcrypto/man/BN_mod_inverse.3
secure/lib/libcrypto/man/BN_mod_mul_montgomery.3
secure/lib/libcrypto/man/BN_mod_mul_reciprocal.3
secure/lib/libcrypto/man/BN_new.3
secure/lib/libcrypto/man/BN_num_bytes.3
secure/lib/libcrypto/man/BN_rand.3
secure/lib/libcrypto/man/BN_set_bit.3
secure/lib/libcrypto/man/BN_swap.3
secure/lib/libcrypto/man/BN_zero.3
secure/lib/libcrypto/man/CMS_add0_cert.3
secure/lib/libcrypto/man/CMS_add1_recipient_cert.3
secure/lib/libcrypto/man/CMS_compress.3
secure/lib/libcrypto/man/CMS_decrypt.3
secure/lib/libcrypto/man/CMS_encrypt.3
secure/lib/libcrypto/man/CMS_final.3
secure/lib/libcrypto/man/CMS_get0_RecipientInfos.3
secure/lib/libcrypto/man/CMS_get0_SignerInfos.3
secure/lib/libcrypto/man/CMS_get0_type.3
secure/lib/libcrypto/man/CMS_get1_ReceiptRequest.3
secure/lib/libcrypto/man/CMS_sign.3
secure/lib/libcrypto/man/CMS_sign_add1_signer.3
secure/lib/libcrypto/man/CMS_sign_receipt.3
secure/lib/libcrypto/man/CMS_uncompress.3
secure/lib/libcrypto/man/CMS_verify.3
secure/lib/libcrypto/man/CMS_verify_receipt.3
secure/lib/libcrypto/man/CONF_modules_free.3
secure/lib/libcrypto/man/CONF_modules_load_file.3
secure/lib/libcrypto/man/CRYPTO_set_ex_data.3
secure/lib/libcrypto/man/DH_generate_key.3
secure/lib/libcrypto/man/DH_generate_parameters.3
secure/lib/libcrypto/man/DH_get_ex_new_index.3
secure/lib/libcrypto/man/DH_new.3
secure/lib/libcrypto/man/DH_set_method.3
secure/lib/libcrypto/man/DH_size.3
secure/lib/libcrypto/man/DSA_SIG_new.3
secure/lib/libcrypto/man/DSA_do_sign.3
secure/lib/libcrypto/man/DSA_dup_DH.3
secure/lib/libcrypto/man/DSA_generate_key.3
secure/lib/libcrypto/man/DSA_generate_parameters.3
secure/lib/libcrypto/man/DSA_get_ex_new_index.3
secure/lib/libcrypto/man/DSA_new.3
secure/lib/libcrypto/man/DSA_set_method.3
secure/lib/libcrypto/man/DSA_sign.3
secure/lib/libcrypto/man/DSA_size.3
secure/lib/libcrypto/man/ERR_GET_LIB.3
secure/lib/libcrypto/man/ERR_clear_error.3
secure/lib/libcrypto/man/ERR_error_string.3
secure/lib/libcrypto/man/ERR_get_error.3
secure/lib/libcrypto/man/ERR_load_crypto_strings.3
secure/lib/libcrypto/man/ERR_load_strings.3
secure/lib/libcrypto/man/ERR_print_errors.3
secure/lib/libcrypto/man/ERR_put_error.3
secure/lib/libcrypto/man/ERR_remove_state.3
secure/lib/libcrypto/man/ERR_set_mark.3
secure/lib/libcrypto/man/EVP_BytesToKey.3
secure/lib/libcrypto/man/EVP_DigestInit.3
secure/lib/libcrypto/man/EVP_DigestSignInit.3
secure/lib/libcrypto/man/EVP_DigestVerifyInit.3
secure/lib/libcrypto/man/EVP_EncryptInit.3
secure/lib/libcrypto/man/EVP_OpenInit.3
secure/lib/libcrypto/man/EVP_PKEY_CTX_ctrl.3
secure/lib/libcrypto/man/EVP_PKEY_CTX_new.3
secure/lib/libcrypto/man/EVP_PKEY_cmp.3
secure/lib/libcrypto/man/EVP_PKEY_decrypt.3
secure/lib/libcrypto/man/EVP_PKEY_derive.3
secure/lib/libcrypto/man/EVP_PKEY_encrypt.3
secure/lib/libcrypto/man/EVP_PKEY_get_default_digest.3
secure/lib/libcrypto/man/EVP_PKEY_keygen.3
secure/lib/libcrypto/man/EVP_PKEY_new.3
secure/lib/libcrypto/man/EVP_PKEY_print_private.3
secure/lib/libcrypto/man/EVP_PKEY_set1_RSA.3
secure/lib/libcrypto/man/EVP_PKEY_sign.3
secure/lib/libcrypto/man/EVP_PKEY_verify.3
secure/lib/libcrypto/man/EVP_PKEY_verifyrecover.3
secure/lib/libcrypto/man/EVP_SealInit.3
secure/lib/libcrypto/man/EVP_SignInit.3
secure/lib/libcrypto/man/EVP_VerifyInit.3
secure/lib/libcrypto/man/OBJ_nid2obj.3
secure/lib/libcrypto/man/OPENSSL_Applink.3
secure/lib/libcrypto/man/OPENSSL_VERSION_NUMBER.3
secure/lib/libcrypto/man/OPENSSL_config.3
secure/lib/libcrypto/man/OPENSSL_ia32cap.3
secure/lib/libcrypto/man/OPENSSL_load_builtin_modules.3
secure/lib/libcrypto/man/OpenSSL_add_all_algorithms.3
secure/lib/libcrypto/man/PEM_write_bio_CMS_stream.3
secure/lib/libcrypto/man/PEM_write_bio_PKCS7_stream.3
secure/lib/libcrypto/man/PKCS12_create.3
secure/lib/libcrypto/man/PKCS12_parse.3
secure/lib/libcrypto/man/PKCS7_decrypt.3
secure/lib/libcrypto/man/PKCS7_encrypt.3
secure/lib/libcrypto/man/PKCS7_sign.3
secure/lib/libcrypto/man/PKCS7_sign_add_signer.3
secure/lib/libcrypto/man/PKCS7_verify.3
secure/lib/libcrypto/man/RAND_add.3
secure/lib/libcrypto/man/RAND_bytes.3
secure/lib/libcrypto/man/RAND_cleanup.3
secure/lib/libcrypto/man/RAND_egd.3
secure/lib/libcrypto/man/RAND_load_file.3
secure/lib/libcrypto/man/RAND_set_rand_method.3
secure/lib/libcrypto/man/RSA_blinding_on.3
secure/lib/libcrypto/man/RSA_check_key.3
secure/lib/libcrypto/man/RSA_generate_key.3
secure/lib/libcrypto/man/RSA_get_ex_new_index.3
secure/lib/libcrypto/man/RSA_new.3
secure/lib/libcrypto/man/RSA_padding_add_PKCS1_type_1.3
secure/lib/libcrypto/man/RSA_print.3
secure/lib/libcrypto/man/RSA_private_encrypt.3
secure/lib/libcrypto/man/RSA_public_encrypt.3
secure/lib/libcrypto/man/RSA_set_method.3
secure/lib/libcrypto/man/RSA_sign.3
secure/lib/libcrypto/man/RSA_sign_ASN1_OCTET_STRING.3
secure/lib/libcrypto/man/RSA_size.3
secure/lib/libcrypto/man/SMIME_read_CMS.3
secure/lib/libcrypto/man/SMIME_read_PKCS7.3
secure/lib/libcrypto/man/SMIME_write_CMS.3
secure/lib/libcrypto/man/SMIME_write_PKCS7.3
secure/lib/libcrypto/man/X509_NAME_ENTRY_get_object.3
secure/lib/libcrypto/man/X509_NAME_add_entry_by_txt.3
secure/lib/libcrypto/man/X509_NAME_get_index_by_NID.3
secure/lib/libcrypto/man/X509_NAME_print_ex.3
secure/lib/libcrypto/man/X509_STORE_CTX_get_error.3
secure/lib/libcrypto/man/X509_STORE_CTX_get_ex_new_index.3
secure/lib/libcrypto/man/X509_STORE_CTX_new.3
secure/lib/libcrypto/man/X509_STORE_CTX_set_verify_cb.3
secure/lib/libcrypto/man/X509_STORE_set_verify_cb_func.3
secure/lib/libcrypto/man/X509_VERIFY_PARAM_set_flags.3
secure/lib/libcrypto/man/X509_new.3
secure/lib/libcrypto/man/X509_verify_cert.3
secure/lib/libcrypto/man/bio.3
secure/lib/libcrypto/man/blowfish.3
secure/lib/libcrypto/man/bn.3
secure/lib/libcrypto/man/bn_internal.3
secure/lib/libcrypto/man/buffer.3
secure/lib/libcrypto/man/crypto.3
secure/lib/libcrypto/man/d2i_ASN1_OBJECT.3
secure/lib/libcrypto/man/d2i_DHparams.3
secure/lib/libcrypto/man/d2i_DSAPublicKey.3
secure/lib/libcrypto/man/d2i_PKCS8PrivateKey.3
secure/lib/libcrypto/man/d2i_RSAPublicKey.3
secure/lib/libcrypto/man/d2i_X509.3
secure/lib/libcrypto/man/d2i_X509_ALGOR.3
secure/lib/libcrypto/man/d2i_X509_CRL.3
secure/lib/libcrypto/man/d2i_X509_NAME.3
secure/lib/libcrypto/man/d2i_X509_REQ.3
secure/lib/libcrypto/man/d2i_X509_SIG.3
secure/lib/libcrypto/man/des.3
secure/lib/libcrypto/man/des_modes.7
secure/lib/libcrypto/man/dh.3
secure/lib/libcrypto/man/dsa.3
secure/lib/libcrypto/man/ecdsa.3
secure/lib/libcrypto/man/engine.3
secure/lib/libcrypto/man/err.3
secure/lib/libcrypto/man/evp.3
secure/lib/libcrypto/man/hmac.3
secure/lib/libcrypto/man/i2d_CMS_bio_stream.3
secure/lib/libcrypto/man/i2d_PKCS7_bio_stream.3
secure/lib/libcrypto/man/lh_stats.3
secure/lib/libcrypto/man/lhash.3
secure/lib/libcrypto/man/md5.3
secure/lib/libcrypto/man/mdc2.3
secure/lib/libcrypto/man/pem.3
secure/lib/libcrypto/man/rand.3
secure/lib/libcrypto/man/rc4.3
secure/lib/libcrypto/man/ripemd.3
secure/lib/libcrypto/man/rsa.3
secure/lib/libcrypto/man/sha.3
secure/lib/libcrypto/man/threads.3
secure/lib/libcrypto/man/ui.3
secure/lib/libcrypto/man/ui_compat.3
secure/lib/libcrypto/man/x509.3
secure/lib/libcrypto/opensslconf-i386.h
secure/lib/libcrypto/opensslconf-x86_64.h
secure/lib/libssl/Makefile
secure/lib/libssl/man/SSL_CIPHER_get_name.3
secure/lib/libssl/man/SSL_COMP_add_compression_method.3
secure/lib/libssl/man/SSL_CTX_add_extra_chain_cert.3
secure/lib/libssl/man/SSL_CTX_add_session.3
secure/lib/libssl/man/SSL_CTX_ctrl.3
secure/lib/libssl/man/SSL_CTX_flush_sessions.3
secure/lib/libssl/man/SSL_CTX_free.3
secure/lib/libssl/man/SSL_CTX_get_ex_new_index.3
secure/lib/libssl/man/SSL_CTX_get_verify_mode.3
secure/lib/libssl/man/SSL_CTX_load_verify_locations.3
secure/lib/libssl/man/SSL_CTX_new.3
secure/lib/libssl/man/SSL_CTX_sess_number.3
secure/lib/libssl/man/SSL_CTX_sess_set_cache_size.3
secure/lib/libssl/man/SSL_CTX_sess_set_get_cb.3
secure/lib/libssl/man/SSL_CTX_sessions.3
secure/lib/libssl/man/SSL_CTX_set_cert_store.3
secure/lib/libssl/man/SSL_CTX_set_cert_verify_callback.3
secure/lib/libssl/man/SSL_CTX_set_cipher_list.3
secure/lib/libssl/man/SSL_CTX_set_client_CA_list.3
secure/lib/libssl/man/SSL_CTX_set_client_cert_cb.3
secure/lib/libssl/man/SSL_CTX_set_default_passwd_cb.3
secure/lib/libssl/man/SSL_CTX_set_generate_session_id.3
secure/lib/libssl/man/SSL_CTX_set_info_callback.3
secure/lib/libssl/man/SSL_CTX_set_max_cert_list.3
secure/lib/libssl/man/SSL_CTX_set_mode.3
secure/lib/libssl/man/SSL_CTX_set_msg_callback.3
secure/lib/libssl/man/SSL_CTX_set_options.3
secure/lib/libssl/man/SSL_CTX_set_psk_client_callback.3
secure/lib/libssl/man/SSL_CTX_set_quiet_shutdown.3
secure/lib/libssl/man/SSL_CTX_set_session_cache_mode.3
secure/lib/libssl/man/SSL_CTX_set_session_id_context.3
secure/lib/libssl/man/SSL_CTX_set_ssl_version.3
secure/lib/libssl/man/SSL_CTX_set_timeout.3
secure/lib/libssl/man/SSL_CTX_set_tmp_dh_callback.3
secure/lib/libssl/man/SSL_CTX_set_tmp_rsa_callback.3
secure/lib/libssl/man/SSL_CTX_set_verify.3
secure/lib/libssl/man/SSL_CTX_use_certificate.3
secure/lib/libssl/man/SSL_CTX_use_psk_identity_hint.3
secure/lib/libssl/man/SSL_SESSION_free.3
secure/lib/libssl/man/SSL_SESSION_get_ex_new_index.3
secure/lib/libssl/man/SSL_SESSION_get_time.3
secure/lib/libssl/man/SSL_accept.3
secure/lib/libssl/man/SSL_alert_type_string.3
secure/lib/libssl/man/SSL_clear.3
secure/lib/libssl/man/SSL_connect.3
secure/lib/libssl/man/SSL_do_handshake.3
secure/lib/libssl/man/SSL_free.3
secure/lib/libssl/man/SSL_get_SSL_CTX.3
secure/lib/libssl/man/SSL_get_ciphers.3
secure/lib/libssl/man/SSL_get_client_CA_list.3
secure/lib/libssl/man/SSL_get_current_cipher.3
secure/lib/libssl/man/SSL_get_default_timeout.3
secure/lib/libssl/man/SSL_get_error.3
secure/lib/libssl/man/SSL_get_ex_data_X509_STORE_CTX_idx.3
secure/lib/libssl/man/SSL_get_ex_new_index.3
secure/lib/libssl/man/SSL_get_fd.3
secure/lib/libssl/man/SSL_get_peer_cert_chain.3
secure/lib/libssl/man/SSL_get_peer_certificate.3
secure/lib/libssl/man/SSL_get_psk_identity.3
secure/lib/libssl/man/SSL_get_rbio.3
secure/lib/libssl/man/SSL_get_session.3
secure/lib/libssl/man/SSL_get_verify_result.3
secure/lib/libssl/man/SSL_get_version.3
secure/lib/libssl/man/SSL_library_init.3
secure/lib/libssl/man/SSL_load_client_CA_file.3
secure/lib/libssl/man/SSL_new.3
secure/lib/libssl/man/SSL_pending.3
secure/lib/libssl/man/SSL_read.3
secure/lib/libssl/man/SSL_rstate_string.3
secure/lib/libssl/man/SSL_session_reused.3
secure/lib/libssl/man/SSL_set_bio.3
secure/lib/libssl/man/SSL_set_connect_state.3
secure/lib/libssl/man/SSL_set_fd.3
secure/lib/libssl/man/SSL_set_session.3
secure/lib/libssl/man/SSL_set_shutdown.3
secure/lib/libssl/man/SSL_set_verify_result.3
secure/lib/libssl/man/SSL_shutdown.3
secure/lib/libssl/man/SSL_state_string.3
secure/lib/libssl/man/SSL_want.3
secure/lib/libssl/man/SSL_write.3
secure/lib/libssl/man/d2i_SSL_SESSION.3
secure/lib/libssl/man/ssl.3
secure/usr.bin/openssl/Makefile
secure/usr.bin/openssl/man/CA.pl.1
secure/usr.bin/openssl/man/asn1parse.1
secure/usr.bin/openssl/man/ca.1
secure/usr.bin/openssl/man/ciphers.1
secure/usr.bin/openssl/man/cms.1
secure/usr.bin/openssl/man/config.5
secure/usr.bin/openssl/man/crl.1
secure/usr.bin/openssl/man/crl2pkcs7.1
secure/usr.bin/openssl/man/dgst.1
secure/usr.bin/openssl/man/dhparam.1
secure/usr.bin/openssl/man/dsa.1
secure/usr.bin/openssl/man/dsaparam.1
secure/usr.bin/openssl/man/ec.1
secure/usr.bin/openssl/man/ecparam.1
secure/usr.bin/openssl/man/enc.1
secure/usr.bin/openssl/man/errstr.1
secure/usr.bin/openssl/man/gendsa.1
secure/usr.bin/openssl/man/genpkey.1
secure/usr.bin/openssl/man/genrsa.1
secure/usr.bin/openssl/man/nseq.1
secure/usr.bin/openssl/man/ocsp.1
secure/usr.bin/openssl/man/openssl.1
secure/usr.bin/openssl/man/passwd.1
secure/usr.bin/openssl/man/pkcs12.1
secure/usr.bin/openssl/man/pkcs7.1
secure/usr.bin/openssl/man/pkcs8.1
secure/usr.bin/openssl/man/pkey.1
secure/usr.bin/openssl/man/pkeyparam.1
secure/usr.bin/openssl/man/pkeyutl.1
secure/usr.bin/openssl/man/rand.1
secure/usr.bin/openssl/man/req.1
secure/usr.bin/openssl/man/rsa.1
secure/usr.bin/openssl/man/rsautl.1
secure/usr.bin/openssl/man/s_client.1
secure/usr.bin/openssl/man/s_server.1
secure/usr.bin/openssl/man/s_time.1
secure/usr.bin/openssl/man/sess_id.1
secure/usr.bin/openssl/man/smime.1
secure/usr.bin/openssl/man/speed.1
secure/usr.bin/openssl/man/spkac.1
secure/usr.bin/openssl/man/ts.1
secure/usr.bin/openssl/man/tsget.1
secure/usr.bin/openssl/man/verify.1
secure/usr.bin/openssl/man/version.1
secure/usr.bin/openssl/man/x509.1
secure/usr.bin/openssl/man/x509v3_config.5

index 6668334..55ea141 100644 (file)
@@ -1984,6 +1984,9 @@ TO_REMOVE+=/usr/lib/profile/libtinfo.aa
 TO_REMOVE+=/usr/bin/kzip
 TO_REMOVE+=/usr/share/man/cat8/kzip.8.gz
 TO_REMOVE+=/usr/share/man/man8/kzip.8.gz
+TO_REMOVE+=/usr/include/openssl/e_os.h
+TO_REMOVE+=/usr/include/openssl/eng_int.h
+TO_REMOVE+=/usr/include/openssl/ui_locl.h
 
 .if ${MACHINE_ARCH} == "x86_64"
 TO_REMOVE+=/usr/libdata/stallion/2681.sys
old mode 100755 (executable)
new mode 100644 (file)
index c0cf33e..ae364d0 100644 (file)
@@ -132,7 +132,6 @@ extern "C" {
 
 int SSL_CTX_set_tlsext_use_srtp(SSL_CTX *ctx, const char *profiles);
 int SSL_set_tlsext_use_srtp(SSL *ctx, const char *profiles);
-SRTP_PROTECTION_PROFILE *SSL_get_selected_srtp_profile(SSL *s);
 
 STACK_OF(SRTP_PROTECTION_PROFILE) *SSL_get_srtp_profiles(SSL *ssl);
 SRTP_PROTECTION_PROFILE *SSL_get_selected_srtp_profile(SSL *s);
index 0b6980f..bc8d47a 100644 (file)
@@ -20,44 +20,63 @@ LDFLAGS+=-Wl,-Bsymbolic
 .include "Makefile.inc"
 
 # base sources
-SRCS=  cpt_err.c cryptlib.c cversion.c ebcdic.c ex_data.c \
-       mem.c mem_dbg.c o_dir.c o_time.c uid.c
+SRCS=  cryptlib.c mem.c mem_dbg.c cversion.c ex_data.c cpt_err.c ebcdic.c \
+       uid.c o_time.c o_dir.c o_fips.c o_init.c fips_ers.c
 .if ${MACHINE_ARCH} == "i386"
 SRCS+= x86cpuid.s
 .elif ${MACHINE_ARCH} == "x86_64"
 SRCS+= x86_64cpuid.s
 .endif
-INCS=  ../e_os.h ../e_os2.h crypto.h ebcdic.h opensslv.h ossl_typ.h symhacks.h
+INCS=  ../e_os2.h
+INCS+= crypto.h opensslv.h opensslconf.h ebcdic.h symhacks.h ossl_typ.h
+SRCS+= buildinf.h
+INCSDIR=       ${INCLUDEDIR}/openssl
+
+CLEANFILES+=   buildinf.h opensslconf.h
+
+buildinf.h:
+       ( echo "#ifndef MK1MF_BUILD"; \
+       echo "  /* auto-generated by crypto/Makefile.ssl for crypto/cversion.c */"; \
+       echo "  #define CFLAGS \"$(CC)\""; \
+       echo "  #define PLATFORM \"`uname -s`-`uname -m`\""; \
+       echo "  #define DATE \"`LC_ALL=C date`\""; \
+       echo "#endif" ) > ${.TARGET}
+
+opensslconf.h: opensslconf-${MACHINE_ARCH}.h
+.if defined(WANT_IDEA)
+       sed '/^# define OPENSSL_NO_IDEA$$/d;/^#  define NO_IDEA$$/d' ${.ALLSRC} > ${.TARGET}
+.else
+       cp ${.ALLSRC} ${.TARGET}
+.endif
+
 
 # aes
-SRCS+= aes_cfb.c aes_ctr.c aes_ecb.c aes_ige.c \
-       aes_misc.c aes_ofb.c aes_wrap.c
+SRCS+= aes_misc.c aes_ecb.c aes_cfb.c aes_ofb.c aes_ctr.c aes_ige.c aes_wrap.c
 .if ${MACHINE_ARCH} == "i386"
-SRCS+= aes-586.s
+SRCS+= aes-586.s vpaes-x86.s aesni-x86.s
 .elif ${MACHINE_ARCH} == "x86_64"
-SRCS+= aes-x86_64.s
+SRCS+= aes-x86_64.s vpaes-x86_64.s bsaes-x86_64.s aesni-x86_64.s aesni-sha1-x86_64.s
 .endif
-INCS+= aes.h aes_locl.h
+INCS+= aes.h
 
 # asn1
-SRCS+= a_bitstr.c a_bool.c a_bytes.c a_d2i_fp.c a_digest.c \
-       a_dup.c a_enum.c a_gentm.c a_i2d_fp.c a_int.c \
-       a_mbstr.c a_object.c a_octet.c a_print.c \
-       a_set.c a_sign.c a_strex.c a_strnid.c a_time.c a_type.c \
-       a_utctm.c a_utf8.c a_verify.c ameth_lib.c asn1_err.c asn1_lib.c \
-       asn1_gen.c asn1_par.c asn_mime.c \
-       asn_moid.c asn_pack.c bio_asn1.c bio_ndef.c d2i_pr.c d2i_pu.c \
-       evp_asn1.c f_enum.c f_int.c f_string.c i2d_pr.c i2d_pu.c \
-       n_pkey.c nsseq.c p5_pbe.c p5_pbev2.c p8_pkey.c t_bitst.c \
-       t_crl.c t_pkey.c t_req.c t_spki.c t_x509.c t_x509a.c \
-       tasn_dec.c tasn_enc.c tasn_fre.c tasn_new.c tasn_prn.c tasn_typ.c \
-       tasn_utl.c x_algor.c x_attrib.c x_bignum.c x_crl.c \
-       x_exten.c x_info.c x_long.c x_name.c x_nx509.c x_pkey.c x_pubkey.c \
-       x_req.c x_sig.c x_spki.c x_val.c x_x509.c x_x509a.c
+SRCS+= a_object.c a_bitstr.c a_utctm.c a_gentm.c a_time.c a_int.c a_octet.c \
+       a_print.c a_type.c a_set.c a_dup.c a_d2i_fp.c a_i2d_fp.c \
+       a_enum.c a_utf8.c a_sign.c a_digest.c a_verify.c a_mbstr.c a_strex.c \
+       x_algor.c x_val.c x_pubkey.c x_sig.c x_req.c x_attrib.c x_bignum.c \
+       x_long.c x_name.c x_x509.c x_x509a.c x_crl.c x_info.c x_spki.c nsseq.c \
+       x_nx509.c d2i_pu.c d2i_pr.c i2d_pu.c i2d_pr.c \
+       t_req.c t_x509.c t_x509a.c t_crl.c t_pkey.c t_spki.c t_bitst.c \
+       tasn_new.c tasn_fre.c tasn_enc.c tasn_dec.c tasn_utl.c tasn_typ.c \
+       tasn_prn.c ameth_lib.c \
+       f_int.c f_string.c n_pkey.c \
+       f_enum.c x_pkey.c a_bool.c x_exten.c bio_asn1.c bio_ndef.c asn_mime.c \
+       asn1_gen.c asn1_par.c asn1_lib.c asn1_err.c a_bytes.c a_strnid.c \
+       evp_asn1.c asn_pack.c p5_pbe.c p5_pbev2.c p8_pkey.c asn_moid.c
 INCS+= asn1.h asn1_mac.h asn1t.h
 
 # bf
-SRCS+= bf_cfb64.c bf_ecb.c bf_ofb64.c bf_skey.c
+SRCS+= bf_skey.c bf_ecb.c bf_cfb64.c bf_ofb64.c
 .if ${MACHINE_ARCH} == "i386"
 SRCS+= bf-586.s
 .elif ${MACHINE_ARCH} == "x86_64"
@@ -66,33 +85,33 @@ SRCS+=      bf_enc.c
 INCS+= blowfish.h
 
 # bio
-SRCS+= b_dump.c b_print.c b_sock.c bf_buff.c bf_lbuf.c bf_nbio.c \
-       bf_null.c bio_cb.c bio_err.c bio_lib.c bss_acpt.c bss_bio.c \
-       bss_conn.c bss_dgram.c bss_fd.c bss_file.c bss_log.c bss_mem.c \
-       bss_null.c bss_sock.c
+SRCS+= bio_lib.c bio_cb.c bio_err.c \
+       bss_mem.c bss_null.c bss_fd.c \
+       bss_file.c bss_sock.c bss_conn.c \
+       bf_null.c bf_buff.c b_print.c b_dump.c \
+       b_sock.c bss_acpt.c bf_nbio.c bss_log.c bss_bio.c \
+       bss_dgram.c
 INCS+= bio.h
 
 # bn
-SRCS+= bn_add.c bn_blind.c bn_const.c bn_ctx.c bn_depr.c \
-       bn_div.c bn_err.c bn_exp.c \
-       bn_exp2.c bn_gcd.c bn_gf2m.c bn_kron.c bn_lib.c bn_mod.c bn_mont.c \
-       bn_mpi.c bn_mul.c bn_nist.c \
-       bn_prime.c bn_print.c bn_rand.c bn_recp.c \
-       bn_shift.c bn_sqr.c bn_sqrt.c bn_word.c
+SRCS+= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c bn_mod.c \
+       bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \
+       bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c \
+       bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \
+       bn_depr.c bn_const.c bn_x931p.c
 .if ${MACHINE_ARCH} == "i386"
-SRCS+= bn-586.s co-586.s x86-mont.s
+SRCS+= bn-586.s co-586.s x86-mont.s x86-gf2m.s
 .elif ${MACHINE_ARCH} == "x86_64"
-SRCS+= x86_64-gcc.c x86_64-mont.s
+SRCS+= x86_64-gcc.c x86_64-mont.s x86_64-mont5.s x86_64-gf2m.s modexp512-x86_64.s
 .endif
 INCS+= bn.h
 
 # buffer
-SRCS+= buf_err.c buffer.c
+SRCS+= buffer.c buf_str.c buf_err.c
 INCS+= buffer.h
 
 # camellia
-SRCS+= cmll_cfb.c \
-       cmll_ctr.c cmll_ecb.c cmll_ofb.c
+SRCS+= cmll_ecb.c cmll_ofb.c cmll_cfb.c cmll_ctr.c cmll_utl.c
 .if ${MACHINE_ARCH} == "i386"
 SRCS+= cmll-x86.s
 .elif ${MACHINE_ARCH} == "x86_64"
@@ -101,29 +120,36 @@ SRCS+=    cmll-x86_64.s cmll_misc.c
 INCS+= camellia.h
 
 # cast
-SRCS+= c_cfb64.c c_ecb.c c_enc.c c_ofb64.c c_skey.c
+SRCS+= c_skey.c c_ecb.c c_enc.c c_cfb64.c c_ofb64.c
 INCS+= cast.h
 
+# cmac
+SRCS+= cmac.c cm_ameth.c cm_pmeth.c
+INCS+= cmac.h
+
 # cms
-SRCS+= cms_asn1.c cms_att.c cms_cd.c cms_dd.c cms_enc.c cms_env.c cms_err.c \
-       cms_ess.c cms_io.c cms_lib.c cms_sd.c cms_smime.c
+SRCS+= cms_lib.c cms_asn1.c cms_att.c cms_io.c cms_smime.c cms_err.c \
+       cms_sd.c cms_dd.c cms_cd.c cms_env.c cms_enc.c cms_ess.c \
+       cms_pwri.c
 INCS+= cms.h
 
 # comp
-SRCS+= c_rle.c c_zlib.c comp_err.c comp_lib.c
+SRCS+= comp_lib.c comp_err.c \
+       c_rle.c c_zlib.c
 INCS+= comp.h
 
 # conf
-SRCS+= conf_api.c conf_def.c conf_err.c conf_lib.c conf_mall.c conf_mod.c conf_sap.c
+SRCS+= conf_err.c conf_lib.c conf_api.c conf_def.c conf_mod.c \
+       conf_mall.c conf_sap.c
 INCS+= conf.h conf_api.h
 
 # des
-SRCS+= cbc3_enc.c cbc_cksm.c cbc_enc.c cfb64ede.c cfb64enc.c cfb_enc.c \
-       des_old.c des_old2.c \
-       ecb3_enc.c ecb_enc.c ede_cbcm_enc.c \
-       enc_read.c enc_writ.c fcrypt.c ofb64ede.c ofb64enc.c \
-       ofb_enc.c pcbc_enc.c qud_cksm.c rand_key.c read2pwd.c \
-       rpc_enc.c set_key.c str2key.c xcbc_enc.c
+SRCS+= set_key.c  ecb_enc.c  cbc_enc.c \
+       ecb3_enc.c cfb64enc.c cfb64ede.c cfb_enc.c  ofb64ede.c \
+       enc_read.c enc_writ.c ofb64enc.c \
+       ofb_enc.c  str2key.c  pcbc_enc.c qud_cksm.c rand_key.c \
+       fcrypt.c xcbc_enc.c rpc_enc.c  cbc_cksm.c \
+       ede_cbcm_enc.c des_old.c des_old2.c read2pwd.c
 .if ${MACHINE_ARCH} == "i386"
 SRCS+= des-586.s crypt586.s
 .elif ${MACHINE_ARCH} == "x86_64"
@@ -132,14 +158,13 @@ SRCS+=    des_enc.c fcrypt_b.c
 INCS+= des.h des_old.h
 
 # dh
-SRCS+= dh_ameth.c dh_asn1.c dh_check.c dh_depr.c \
-       dh_err.c dh_gen.c dh_key.c dh_lib.c dh_pmeth.c dh_prn.c
+SRCS+= dh_asn1.c dh_gen.c dh_key.c dh_lib.c dh_check.c dh_err.c dh_depr.c \
+       dh_ameth.c dh_pmeth.c dh_prn.c
 INCS+= dh.h
 
 # dsa
-SRCS+= dsa_ameth.c dsa_asn1.c dsa_depr.c dsa_err.c \
-       dsa_gen.c dsa_key.c dsa_lib.c \
-       dsa_ossl.c dsa_pmeth.c dsa_prn.c dsa_sign.c dsa_vrf.c
+SRCS+= dsa_gen.c dsa_key.c dsa_lib.c dsa_asn1.c dsa_vrf.c dsa_sign.c \
+       dsa_err.c dsa_ossl.c dsa_depr.c dsa_ameth.c dsa_pmeth.c dsa_prn.c
 INCS+= dsa.h
 
 # dso
@@ -147,64 +172,63 @@ SRCS+=    dso_dl.c dso_dlfcn.c dso_err.c dso_lib.c dso_null.c dso_openssl.c
 INCS+= dso.h
 
 # ec
-SRCS+= ec_ameth.c ec_asn1.c ec_check.c ec_curve.c \
-       ec_cvt.c ec_err.c ec_key.c ec_lib.c \
-       ec_mult.c ec_pmeth.c ec_print.c ec2_smpl.c ec2_mult.c \
-       eck_prn.c ecp_mont.c ecp_nist.c \
-       ecp_smpl.c
+SRCS+= ec_lib.c ecp_smpl.c ecp_mont.c ecp_nist.c ec_cvt.c ec_mult.c \
+       ec_err.c ec_curve.c ec_check.c ec_print.c ec_asn1.c ec_key.c \
+       ec2_smpl.c ec2_mult.c ec_ameth.c ec_pmeth.c eck_prn.c \
+       ecp_nistp224.c ecp_nistp256.c ecp_nistp521.c ecp_nistputil.c \
+       ecp_oct.c ec2_oct.c ec_oct.c
 INCS+= ec.h
 
 # ecdh
-SRCS+= ech_err.c ech_key.c ech_lib.c ech_ossl.c
+SRCS+= ech_lib.c ech_ossl.c ech_key.c ech_err.c
 INCS+= ecdh.h
 
 # ecdsa
-SRCS+= ecs_asn1.c ecs_err.c ecs_lib.c ecs_ossl.c ecs_sign.c ecs_vrf.c
+SRCS+= ecs_lib.c ecs_asn1.c ecs_ossl.c ecs_sign.c ecs_vrf.c ecs_err.c
 INCS+= ecdsa.h
 
 # engine
 CFLAGS+=-DHAVE_CRYPTODEV
-SRCS+= eng_all.c eng_cnf.c eng_cryptodev.c eng_ctrl.c eng_dyn.c eng_err.c \
-       eng_fat.c eng_init.c eng_lib.c eng_list.c eng_openssl.c \
-       eng_pkey.c eng_table.c tb_asnmth.c tb_cipher.c tb_dh.c \
-       tb_digest.c tb_dsa.c tb_ecdh.c tb_ecdsa.c \
-       tb_pkmeth.c tb_rand.c tb_rsa.c tb_store.c
-INCS+= eng_int.h engine.h
+SRCS+= eng_err.c eng_lib.c eng_list.c eng_init.c eng_ctrl.c \
+       eng_table.c eng_pkey.c eng_fat.c eng_all.c \
+       tb_rsa.c tb_dsa.c tb_ecdsa.c tb_dh.c tb_ecdh.c tb_rand.c tb_store.c \
+       tb_cipher.c tb_digest.c tb_pkmeth.c tb_asnmth.c \
+       eng_openssl.c eng_cnf.c eng_dyn.c eng_cryptodev.c \
+       eng_rsax.c eng_rdrand.c
+INCS+= engine.h
 
 # err
 SRCS+= err.c err_all.c err_prn.c
 INCS+= err.h
 
 # evp
-SRCS+= bio_b64.c bio_enc.c bio_md.c bio_ok.c c_all.c c_allc.c c_alld.c \
-       digest.c e_aes.c e_bf.c e_camellia.c e_cast.c e_des.c e_des3.c \
-       e_idea.c e_null.c e_rc2.c e_rc4.c e_rc5.c e_seed.c e_xcbc_d.c \
-       encode.c \
-       evp_acnf.c evp_enc.c evp_err.c evp_key.c evp_lib.c evp_pbe.c \
-       evp_pkey.c m_dss.c m_dss1.c m_ecdsa.c m_md2.c m_md4.c m_md5.c \
-       m_mdc2.c m_null.c m_ripemd.c m_sha.c m_sha1.c \
-       m_sigver.c m_wp.c names.c openbsd_hw.c \
-       p5_crpt.c p5_crpt2.c p_dec.c p_enc.c p_lib.c p_open.c p_seal.c \
-       p_sign.c p_verify.c pmeth_fn.c pmeth_gn.c pmeth_lib.c
+SRCS+= encode.c digest.c evp_enc.c evp_key.c evp_acnf.c \
+       e_des.c e_bf.c e_idea.c e_des3.c e_camellia.c\
+       e_rc4.c e_aes.c names.c e_seed.c \
+       e_xcbc_d.c e_rc2.c e_cast.c e_rc5.c \
+       m_null.c m_md2.c m_md4.c m_md5.c m_sha.c m_sha1.c m_wp.c \
+       m_dss.c m_dss1.c m_mdc2.c m_ripemd.c m_ecdsa.c\
+       p_open.c p_seal.c p_sign.c p_verify.c p_lib.c p_enc.c p_dec.c \
+       bio_md.c bio_b64.c bio_enc.c evp_err.c e_null.c \
+       c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \
+       evp_pkey.c evp_pbe.c p5_crpt.c p5_crpt2.c \
+       e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c evp_fips.c \
+       e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c
 INCS+= evp.h
 
 # hmac
-SRCS+= hm_ameth.c hm_pmeth.c hmac.c
+SRCS+= hmac.c hm_ameth.c hm_pmeth.c
 INCS+= hmac.h
 
 # idea
 .if defined(WANT_IDEA)
-SRCS+= i_cbc.c i_cfb64.c i_ecb.c i_ofb64.c i_skey.c
+SRCS+= i_cbc.c i_cfb64.c i_ofb64.c i_ecb.c i_skey.c
 INCS+= idea.h
 _ideapath=     ${LCRYPTO_SRC}/crypto/idea
 .endif
 
-# krb5
-#SRCS+=        krb5_asn.c
-#INCS+=        krb5_asn.h
-
 # lhash
-SRCS+= lh_stats.c lhash.c
+SRCS+= lhash.c lh_stats.c
 INCS+= lhash.h
 
 # md2
@@ -225,36 +249,42 @@ SRCS+=    md5-x86_64.s
 INCS+= md5.h
 
 # mdc2
-SRCS+= mdc2_one.c mdc2dgst.c
+SRCS+= mdc2dgst.c mdc2_one.c
 INCS+= mdc2.h
 
 # modes
-SRCS+= cbc128.c cfb128.c ctr128.c cts128.c ofb128.c
+SRCS+= cbc128.c ctr128.c cts128.c cfb128.c ofb128.c gcm128.c \
+       ccm128.c xts128.c
+.if ${MACHINE_ARCH} == "i386"
+SRCS+= ghash-x86.s
+.elif ${MACHINE_ARCH} == "x86_64"
+SRCS+= ghash-x86_64.s
+.endif
 INCS+= modes.h
 
 # objects
-SRCS+= o_names.c obj_dat.c obj_err.c obj_lib.c obj_xref.c
+SRCS+= o_names.c obj_dat.c obj_lib.c obj_err.c obj_xref.c
 INCS+= objects.h obj_mac.h
 
 # ocsp
-SRCS+= ocsp_asn.c ocsp_cl.c ocsp_err.c ocsp_ext.c ocsp_ht.c \
-       ocsp_lib.c ocsp_prn.c ocsp_srv.c ocsp_vfy.c
+SRCS+= ocsp_asn.c ocsp_ext.c ocsp_ht.c ocsp_lib.c ocsp_cl.c \
+       ocsp_srv.c ocsp_prn.c ocsp_vfy.c ocsp_err.c
 INCS+= ocsp.h
 
 # pem
-SRCS+= pem_all.c pem_err.c pem_info.c pem_lib.c pem_oth.c pem_pk8.c \
-       pem_pkey.c pem_seal.c pem_sign.c pem_x509.c pem_xaux.c pvkfmt.c
+SRCS+= pem_sign.c pem_seal.c pem_info.c pem_lib.c pem_all.c pem_err.c \
+       pem_x509.c pem_xaux.c pem_oth.c pem_pk8.c pem_pkey.c pvkfmt.c
 INCS+= pem.h pem2.h
 
 # pkcs12
-SRCS+= p12_add.c p12_asn.c p12_attr.c p12_crpt.c p12_crt.c \
-       p12_decr.c p12_init.c p12_key.c p12_kiss.c p12_mutl.c \
-       p12_npas.c p12_p8d.c p12_p8e.c p12_utl.c pk12err.c
+SRCS+= p12_add.c p12_asn.c p12_attr.c p12_crpt.c p12_crt.c p12_decr.c \
+       p12_init.c p12_key.c p12_kiss.c p12_mutl.c \
+       p12_utl.c p12_npas.c pk12err.c p12_p8d.c p12_p8e.c
 INCS+= pkcs12.h
 
 # pkcs7
-SRCS+= bio_pk7.c example.c pk7_asn1.c pk7_attr.c pk7_dgst.c pk7_doit.c \
-       pk7_lib.c pk7_mime.c pk7_smime.c pkcs7err.c
+SRCS+= pk7_asn1.c pk7_lib.c pkcs7err.c pk7_doit.c pk7_smime.c pk7_attr.c \
+       pk7_mime.c bio_pk7.c
 INCS+= pkcs7.h
 
 # pqueue
@@ -262,26 +292,26 @@ SRCS+=    pqueue.c
 INCS+= pqueue.h
 
 # rand
-SRCS+= md_rand.c rand_egd.c \
-       rand_err.c rand_lib.c rand_nw.c rand_unix.c \
-       randfile.c
+SRCS+= md_rand.c randfile.c rand_lib.c rand_err.c rand_egd.c \
+       rand_unix.c
 INCS+= rand.h
 
 # rc2
-SRCS+= rc2_cbc.c rc2_ecb.c rc2_skey.c rc2cfb64.c rc2ofb64.c
+SRCS+= rc2_ecb.c rc2_skey.c rc2_cbc.c rc2cfb64.c rc2ofb64.c
 INCS+= rc2.h
 
 # rc4
+SRCS+= rc4_utl.c
 .if ${MACHINE_ARCH} == "i386"
 SRCS+= rc4-586.s
 .elif ${MACHINE_ARCH} == "x86_64"
-SRCS+= rc4-x86_64.s
+SRCS+= rc4-x86_64.s rc4-md5-x86_64.s
 .endif
 INCS+= rc4.h
 
 # rc5
 # .. is patented, so don't compile by default
-#SRCS+=        rc5_ecb.c rc5_enc.c rc5_skey.c rc5cfb64.c rc5ofb64.c
+#SRCS+=        rc5_skey.c rc5_ecb.c rc5_enc.c rc5cfb64.c rc5ofb64.c
 #INCS+=        rc5.h
 
 # ripemd
@@ -292,19 +322,18 @@ SRCS+=    rmd-586.s
 INCS+= ripemd.h
 
 # rsa
-SRCS+= rsa_ameth.c rsa_asn1.c rsa_chk.c rsa_depr.c rsa_eay.c \
-       rsa_err.c rsa_gen.c \
-       rsa_lib.c rsa_none.c rsa_null.c rsa_oaep.c \
-       rsa_pk1.c rsa_pmeth.c rsa_prn.c rsa_pss.c \
-       rsa_saos.c rsa_sign.c rsa_ssl.c rsa_x931.c
+SRCS+= rsa_eay.c rsa_gen.c rsa_lib.c rsa_sign.c rsa_saos.c rsa_err.c \
+       rsa_pk1.c rsa_ssl.c rsa_none.c rsa_oaep.c rsa_chk.c rsa_null.c \
+       rsa_pss.c rsa_x931.c rsa_asn1.c rsa_depr.c rsa_ameth.c rsa_prn.c \
+       rsa_pmeth.c rsa_crpt.c
 INCS+= rsa.h
 
 # seed
-SRCS+= seed.c seed_cbc.c seed_cfb.c seed_ecb.c seed_ofb.c
+SRCS+= seed.c seed_ecb.c seed_cbc.c seed_cfb.c seed_ofb.c
 INCS+= seed.h
 
 # sha
-SRCS+= sha1_one.c sha1dgst.c sha256.c sha512.c sha_dgst.c sha_one.c
+SRCS+= sha_dgst.c sha1dgst.c sha_one.c sha1_one.c sha256.c sha512.c
 .if ${MACHINE_ARCH} == "i386"
 SRCS+= sha1-586.s sha256-586.s sha512-586.s
 .elif ${MACHINE_ARCH} == "x86_64"
@@ -312,6 +341,10 @@ SRCS+=     sha1-x86_64.s sha256-x86_64.s sha512-x86_64.s
 .endif
 INCS+= sha.h
 
+# srp
+SRCS+= srp_lib.c srp_vfy.c
+INCS+= srp.h
+
 # stack
 SRCS+= stack.c
 INCS+= stack.h safestack.h
@@ -321,9 +354,9 @@ INCS+=      stack.h safestack.h
 #INCS+=        store.h
 
 # ts
-SRCS+= ts_asn1.c ts_conf.c ts_err.c ts_lib.c \
-       ts_req_print.c ts_req_utils.c ts_rsp_print.c ts_rsp_sign.c \
-       ts_rsp_utils.c ts_rsp_verify.c ts_verify_ctx.c
+SRCS+= ts_err.c ts_req_utils.c ts_req_print.c ts_rsp_utils.c ts_rsp_print.c \
+       ts_rsp_sign.c ts_rsp_verify.c ts_verify_ctx.c ts_lib.c ts_conf.c \
+       ts_asn1.c
 INCS+= ts.h
 
 # txt_db
@@ -331,8 +364,8 @@ SRCS+=      txt_db.c
 INCS+= txt_db.h
 
 # ui
-SRCS+= ui_compat.c ui_err.c ui_lib.c ui_openssl.c ui_util.c
-INCS+= ui.h ui_compat.h ui_locl.h
+SRCS+= ui_err.c ui_lib.c ui_openssl.c ui_util.c ui_compat.c
+INCS+= ui.h ui_compat.h
 
 # whrlpool
 SRCS+= wp_dgst.c
@@ -344,97 +377,80 @@ SRCS+=    wp-x86_64.s
 INCS+= whrlpool.h
 
 # x509
-SRCS+= by_dir.c by_file.c x509_att.c x509_cmp.c x509_d2.c \
-       x509_def.c x509_err.c x509_ext.c x509_lu.c x509_obj.c \
-       x509_r2x.c x509_req.c x509_set.c x509_trs.c x509_txt.c \
-       x509_v3.c x509_vfy.c x509_vpm.c x509cset.c x509name.c x509rset.c \
-       x509spki.c x509type.c x_all.c
+SRCS+= x509_def.c x509_d2.c x509_r2x.c x509_cmp.c \
+       x509_obj.c x509_req.c x509spki.c x509_vfy.c \
+       x509_set.c x509cset.c x509rset.c x509_err.c \
+       x509name.c x509_v3.c x509_ext.c x509_att.c \
+       x509type.c x509_lu.c x_all.c x509_txt.c \
+       x509_trs.c by_file.c by_dir.c x509_vpm.c
 INCS+= x509.h x509_vfy.h
 
 # x509v3
-SRCS+= pcy_cache.c pcy_data.c pcy_lib.c pcy_map.c pcy_node.c pcy_tree.c \
-       v3_addr.c v3_akey.c v3_akeya.c v3_asid.c v3_alt.c v3_bcons.c v3_bitst.c \
-       v3_conf.c v3_cpols.c v3_crld.c v3_enum.c v3_extku.c \
-       v3_genn.c v3_ia5.c v3_info.c v3_int.c v3_lib.c v3_ncons.c v3_ocsp.c \
-       v3_pci.c v3_pcia.c v3_pcons.c v3_pmaps.c v3_pku.c v3_prn.c v3_purp.c \
-       v3_skey.c v3_sxnet.c v3_utl.c v3err.c
+SRCS+= v3_bcons.c v3_bitst.c v3_conf.c v3_extku.c v3_ia5.c v3_lib.c \
+       v3_prn.c v3_utl.c v3err.c v3_genn.c v3_alt.c v3_skey.c v3_akey.c v3_pku.c \
+       v3_int.c v3_enum.c v3_sxnet.c v3_cpols.c v3_crld.c v3_purp.c v3_info.c \
+       v3_ocsp.c v3_akeya.c v3_pmaps.c v3_pcons.c v3_ncons.c v3_pcia.c v3_pci.c \
+       pcy_cache.c pcy_node.c pcy_data.c pcy_map.c pcy_tree.c pcy_lib.c \
+       v3_asid.c v3_addr.c
 INCS+= x509v3.h
 
-SRCS+= buildinf.h
-INCS+= opensslconf.h
-INCSDIR=       ${INCLUDEDIR}/openssl
-
-CLEANFILES+=   buildinf.h opensslconf.h
-
-buildinf.h:
-       ( echo "#ifndef MK1MF_BUILD"; \
-       echo "  /* auto-generated by crypto/Makefile.ssl for crypto/cversion.c */"; \
-       echo "  #define CFLAGS \"$(CC)\""; \
-       echo "  #define PLATFORM \"`uname -s`-`uname -m`\""; \
-       echo "  #define DATE \"`LC_ALL=C date`\""; \
-       echo "#endif" ) > ${.TARGET}
-
-opensslconf.h: opensslconf-${MACHINE_ARCH}.h
-.if defined(WANT_IDEA)
-       sed '/^# define OPENSSL_NO_IDEA$$/d;/^#  define NO_IDEA$$/d' ${.ALLSRC} > ${.TARGET}
-.else
-       cp ${.ALLSRC} ${.TARGET}
-.endif
-
 .include <bsd.lib.mk>
 
+# The crypto subdirs are listed in the order of the vendor's Makefile
+# to aid future imports.
 .PATH: \
        ${.CURDIR}/asm \
        ${LCRYPTO_SRC}/crypto \
+       ${LCRYPTO_SRC}/crypto/objects \
+       ${LCRYPTO_SRC}/crypto/md4 \
+       ${LCRYPTO_SRC}/crypto/md5 \
+       ${LCRYPTO_SRC}/crypto/sha \
+       ${LCRYPTO_SRC}/crypto/mdc2 \
+       ${LCRYPTO_SRC}/crypto/hmac \
+       ${LCRYPTO_SRC}/crypto/ripemd \
+       ${LCRYPTO_SRC}/crypto/whrlpool \
+       ${LCRYPTO_SRC}/crypto/des \
        ${LCRYPTO_SRC}/crypto/aes \
-       ${LCRYPTO_SRC}/crypto/asn1 \
+       ${LCRYPTO_SRC}/crypto/rc2 \
+       ${LCRYPTO_SRC}/crypto/rc4 \
        ${LCRYPTO_SRC}/crypto/bf \
-       ${LCRYPTO_SRC}/crypto/bio \
+       ${LCRYPTO_SRC}/crypto/cast \
+       ${LCRYPTO_SRC}/crypto/camellia \
+       ${LCRYPTO_SRC}/crypto/seed \
+       ${LCRYPTO_SRC}/crypto/modes \
        ${LCRYPTO_SRC}/crypto/bn \
        ${LCRYPTO_SRC}/crypto/bn/asm \
-       ${LCRYPTO_SRC}/crypto/buffer \
-       ${LCRYPTO_SRC}/crypto/camellia \
-       ${LCRYPTO_SRC}/crypto/cast \
-       ${LCRYPTO_SRC}/crypto/cms \
-       ${LCRYPTO_SRC}/crypto/comp \
-       ${LCRYPTO_SRC}/crypto/conf \
-       ${LCRYPTO_SRC}/crypto/des \
-       ${LCRYPTO_SRC}/crypto/dh \
-       ${LCRYPTO_SRC}/crypto/dsa \
-       ${LCRYPTO_SRC}/crypto/dso \
        ${LCRYPTO_SRC}/crypto/ec \
-       ${LCRYPTO_SRC}/crypto/ecdh \
+       ${LCRYPTO_SRC}/crypto/rsa \
+       ${LCRYPTO_SRC}/crypto/dsa \
        ${LCRYPTO_SRC}/crypto/ecdsa \
+       ${LCRYPTO_SRC}/crypto/dh \
+       ${LCRYPTO_SRC}/crypto/ecdh \
+       ${LCRYPTO_SRC}/crypto/dso \
        ${LCRYPTO_SRC}/crypto/engine \
+       ${LCRYPTO_SRC}/crypto/buffer \
+       ${LCRYPTO_SRC}/crypto/bio \
+       ${LCRYPTO_SRC}/crypto/stack \
+       ${LCRYPTO_SRC}/crypto/lhash \
+       ${LCRYPTO_SRC}/crypto/rand \
        ${LCRYPTO_SRC}/crypto/err \
        ${LCRYPTO_SRC}/crypto/evp \
-       ${LCRYPTO_SRC}/crypto/hmac \
-       ${_ideapath} \
-       ${LCRYPTO_SRC}/crypto/lhash \
-       ${LCRYPTO_SRC}/crypto/md4 \
-       ${LCRYPTO_SRC}/crypto/md5 \
-       ${LCRYPTO_SRC}/crypto/mdc2 \
-       ${LCRYPTO_SRC}/crypto/modes \
-       ${LCRYPTO_SRC}/crypto/objects \
-       ${LCRYPTO_SRC}/crypto/ocsp \
+       ${LCRYPTO_SRC}/crypto/asn1 \
        ${LCRYPTO_SRC}/crypto/pem \
-       ${LCRYPTO_SRC}/crypto/pkcs12 \
+       ${LCRYPTO_SRC}/crypto/x509 \
+       ${LCRYPTO_SRC}/crypto/x509v3 \
+       ${LCRYPTO_SRC}/crypto/conf \
+       ${LCRYPTO_SRC}/crypto/txt_db \
        ${LCRYPTO_SRC}/crypto/pkcs7 \
+       ${LCRYPTO_SRC}/crypto/pkcs12 \
+       ${LCRYPTO_SRC}/crypto/comp \
+       ${LCRYPTO_SRC}/crypto/ocsp \
+       ${LCRYPTO_SRC}/crypto/ui \
+       ${LCRYPTO_SRC}/crypto/cms \
        ${LCRYPTO_SRC}/crypto/pqueue \
-       ${LCRYPTO_SRC}/crypto/rand \
-       ${LCRYPTO_SRC}/crypto/rc2 \
-       ${LCRYPTO_SRC}/crypto/rc4 \
-       ${LCRYPTO_SRC}/crypto/ripemd \
-       ${LCRYPTO_SRC}/crypto/rsa \
-       ${LCRYPTO_SRC}/crypto/seed \
-       ${LCRYPTO_SRC}/crypto/sha \
-       ${LCRYPTO_SRC}/crypto/stack \
-       ${LCRYPTO_SRC}/crypto/threads \
        ${LCRYPTO_SRC}/crypto/ts \
-       ${LCRYPTO_SRC}/crypto/txt_db \
-       ${LCRYPTO_SRC}/crypto/ui \
-       ${LCRYPTO_SRC}/crypto/whrlpool \
-       ${LCRYPTO_SRC}/crypto/x509 \
-       ${LCRYPTO_SRC}/crypto/x509v3 \
+       ${LCRYPTO_SRC}/crypto/srp \
+       ${LCRYPTO_SRC}/crypto/cmac \
+       ${_ideapath} \
        ${LCRYPTO_SRC} \
        ${.CURDIR}/man
index 81679b2..c6fdf6b 100644 (file)
@@ -1,20 +1,25 @@
 # $FreeBSD: src/secure/lib/libcrypto/Makefile.inc,v 1.7.2.11 2003/02/20 15:07:32 nectar Exp $
 # $DragonFly: src/secure/lib/libcrypto/Makefile.inc,v 1.18 2008/09/27 21:04:45 pavalos Exp $
 
-OSSLVERSION=   1.0.0g
-OSSLDATE=      2012-01-18
+OSSLVERSION=   1.0.1
+OSSLDATE=      2012-03-14
 LCRYPTO_SRC=   ${.CURDIR}/../../../crypto/openssl
 LCRYPTO_DOC=   ${LCRYPTO_SRC}/doc
 
 CFLAGS+=       -DDSO_DLFCN -DHAVE_DLFCN_H -DL_ENDIAN -DTERMIOS
 CFLAGS+=       -DOPENSSL_THREADS
-CFLAGS+=       -DOPENSSL_IA32_SSE2 -DOPENSSL_BN_ASM_MONT -DSHA1_ASM \
-               -DSHA256_ASM -DSHA512_ASM -DMD5_ASM -DAES_ASM -DWHIRLPOOL_ASM
+CFLAGS+=       -DOPENSSL_IA32_SSE2 -DOPENSSL_BN_ASM_MONT \
+               -DOPENSSL_BN_ASM_GF2m -DSHA1_ASM \
+               -DSHA256_ASM -DSHA512_ASM -DMD5_ASM -DAES_ASM -DVPAES_ASM \
+               -DWHIRLPOOL_ASM -DGHASH_ASM
 .if ${MACHINE_ARCH} == "i386"
-CFLAGS+= -DOPENSSL_BN_ASM_PART_WORDS -DRMD160_ASM
+CFLAGS+=       -DOPENSSL_BN_ASM_PART_WORDS -DRMD160_ASM
+.elif ${MACHINE_ARCH} == "x86_64"
+CFLAGS+=       -DOPENSSL_BN_ASM_MONT5 -DBSAES_ASM
 .endif
 CFLAGS+=       -I${LCRYPTO_SRC} -I${LCRYPTO_SRC}/crypto \
                -I${LCRYPTO_SRC}/crypto/asn1 -I${LCRYPTO_SRC}/crypto/evp \
+               -I${LCRYPTO_SRC}/crypto/modes \
                -I${LCRYPTO_SRC}/crypto/engine -I${.OBJDIR}
 
 MANDIR=        ${SHAREDIR}/openssl/man/man
index 9bba3dd..0838b56 100644 (file)
@@ -11,6 +11,7 @@ OPENSSL_SRC=  ../../../../crypto/openssl
        ${OPENSSL_SRC}/crypto/camellia/asm \
        ${OPENSSL_SRC}/crypto/des/asm \
        ${OPENSSL_SRC}/crypto/md5/asm \
+       ${OPENSSL_SRC}/crypto/modes/asm \
        ${OPENSSL_SRC}/crypto/perlasm \
        ${OPENSSL_SRC}/crypto/rc4/asm \
        ${OPENSSL_SRC}/crypto/ripemd/asm \
@@ -21,63 +22,70 @@ OPENSSL_SRC=        ../../../../crypto/openssl
 # cpuid
 SRCS=  x86cpuid.pl
 
-# aes
-SRCS+= aes-586.pl
-
-# bf
-SRCS+= bf-586.pl
-
 # bn
-SRCS+= bn-586.pl co-586.pl x86-mont.pl
-
-# camellia
-SRCS+= cmll-x86.pl
+SRCS+= bn-586.pl co-586.pl x86-mont.pl x86-gf2m.pl
 
 # des
 SRCS+= des-586.pl crypt586.pl
 
-# md5
-SRCS+= md5-586.pl
+# aes
+SRCS+= aes-586.pl vpaes-x86.pl aesni-x86.pl
+
+# bf
+SRCS+= bf-586.pl
 
 # rc4
 SRCS+= rc4-586.pl
 
-# ripemd
-SRCS+= rmd-586.pl
+# md5
+SRCS+= md5-586.pl
 
 # sha
 SRCS+= sha1-586.pl sha256-586.pl sha512-586.pl
 
+# ripemd
+SRCS+= rmd-586.pl
+
 # whrlpool
 SRCS+= wp-mmx.pl
 
+# camellia
+SRCS+= cmll-x86.pl
+
+# modes
+SRCS+= ghash-x86.pl
+
 PERLFLAGS=     ${CFLAGS}
 
 .elif ${MACHINE_ARCH} == "x86_64"
 # cpuid
 SRCS=  x86_64cpuid.pl
 
-# aes
-SRCS+= aes-x86_64.pl
-
 # bn
-SRCS+= x86_64-mont.pl
+SRCS+= x86_64-mont.pl x86_64-mont5.pl x86_64-gf2m.pl modexp512-x86_64.pl
 
-# camellia
-SRCS+= cmll-x86_64.pl
+# aes
+SRCS+= aes-x86_64.pl vpaes-x86_64.pl bsaes-x86_64.pl \
+       aesni-x86_64.pl aesni-sha1-x86_64.pl
+
+# rc4
+SRCS+= rc4-x86_64.pl rc4-md5-x86_64.pl
 
 # md5
 SRCS+= md5-x86_64.pl
 
-# rc4
-SRCS+= rc4-x86_64.pl
-
 # sha
 SRCS+= sha1-x86_64.pl sha256-x86_64.s sha512-x86_64.pl
 
 # whrlpool
 SRCS+= wp-x86_64.pl
 
+# camellia
+SRCS+= cmll-x86_64.pl
+
+# modes
+SRCS+= ghash-x86_64.pl
+
 PERLFLAGS=
 .endif
 
index dca6d0d..f69b7d5 100644 (file)
@@ -2988,19 +2988,19 @@ _x86_AES_set_encrypt_key:
        popl    %ebp
        ret
 .size  _x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key
-.globl AES_set_encrypt_key
-.type  AES_set_encrypt_key,@function
+.globl private_AES_set_encrypt_key
+.type  private_AES_set_encrypt_key,@function
 .align 16
-AES_set_encrypt_key:
-.L_AES_set_encrypt_key_begin:
+private_AES_set_encrypt_key:
+.L_private_AES_set_encrypt_key_begin:
        call    _x86_AES_set_encrypt_key
        ret
-.size  AES_set_encrypt_key,.-.L_AES_set_encrypt_key_begin
-.globl AES_set_decrypt_key
-.type  AES_set_decrypt_key,@function
+.size  private_AES_set_encrypt_key,.-.L_private_AES_set_encrypt_key_begin
+.globl private_AES_set_decrypt_key
+.type  private_AES_set_decrypt_key,@function
 .align 16
-AES_set_decrypt_key:
-.L_AES_set_decrypt_key_begin:
+private_AES_set_decrypt_key:
+.L_private_AES_set_decrypt_key_begin:
        call    _x86_AES_set_encrypt_key
        cmpl    $0,%eax
        je      .L054proceed
@@ -3229,8 +3229,8 @@ AES_set_decrypt_key:
        popl    %ebx
        popl    %ebp
        ret
-.size  AES_set_decrypt_key,.-.L_AES_set_decrypt_key_begin
+.size  private_AES_set_decrypt_key,.-.L_private_AES_set_decrypt_key_begin
 .byte  65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
 .byte  80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
 .byte  111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.comm  OPENSSL_ia32cap_P,4,4
+.comm  OPENSSL_ia32cap_P,8,4
index d000a57..e385566 100644 (file)
@@ -330,6 +330,9 @@ _x86_64_AES_encrypt_compact:
 .globl AES_encrypt
 .type  AES_encrypt,@function
 .align 16
+.globl asm_AES_encrypt
+.hidden        asm_AES_encrypt
+asm_AES_encrypt:
 AES_encrypt:
        pushq   %rbx
        pushq   %rbp
@@ -775,6 +778,9 @@ _x86_64_AES_decrypt_compact:
 .globl AES_decrypt
 .type  AES_decrypt,@function
 .align 16
+.globl asm_AES_decrypt
+.hidden        asm_AES_decrypt
+asm_AES_decrypt:
 AES_decrypt:
        pushq   %rbx
        pushq   %rbp
@@ -838,10 +844,10 @@ AES_decrypt:
 .Ldec_epilogue:
        .byte   0xf3,0xc3
 .size  AES_decrypt,.-AES_decrypt
-.globl AES_set_encrypt_key
-.type  AES_set_encrypt_key,@function
+.globl private_AES_set_encrypt_key
+.type  private_AES_set_encrypt_key,@function
 .align 16
-AES_set_encrypt_key:
+private_AES_set_encrypt_key:
        pushq   %rbx
        pushq   %rbp
        pushq   %r12
@@ -862,7 +868,7 @@ AES_set_encrypt_key:
        addq    $56,%rsp
 .Lenc_key_epilogue:
        .byte   0xf3,0xc3
-.size  AES_set_encrypt_key,.-AES_set_encrypt_key
+.size  private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
 
 .type  _x86_64_AES_set_encrypt_key,@function
 .align 16
@@ -1103,10 +1109,10 @@ _x86_64_AES_set_encrypt_key:
 .Lexit:
 .byte  0xf3,0xc3                       
 .size  _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
-.globl AES_set_decrypt_key
-.type  AES_set_decrypt_key,@function
+.globl private_AES_set_decrypt_key
+.type  private_AES_set_decrypt_key,@function
 .align 16
-AES_set_decrypt_key:
+private_AES_set_decrypt_key:
        pushq   %rbx
        pushq   %rbp
        pushq   %r12
@@ -1289,11 +1295,14 @@ AES_set_decrypt_key:
        addq    $56,%rsp
 .Ldec_key_epilogue:
        .byte   0xf3,0xc3
-.size  AES_set_decrypt_key,.-AES_set_decrypt_key
+.size  private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
 .globl AES_cbc_encrypt
 .type  AES_cbc_encrypt,@function
 .align 16
 
+.globl asm_AES_cbc_encrypt
+.hidden        asm_AES_cbc_encrypt
+asm_AES_cbc_encrypt:
 AES_cbc_encrypt:
        cmpq    $0,%rdx
        je      .Lcbc_epilogue
diff --git a/secure/lib/libcrypto/asm/aesni-sha1-x86_64.s b/secure/lib/libcrypto/asm/aesni-sha1-x86_64.s
new file mode 100644 (file)
index 0000000..32fd600
--- /dev/null
@@ -0,0 +1,1396 @@
+.text  
+
+
+.globl aesni_cbc_sha1_enc
+.type  aesni_cbc_sha1_enc,@function
+.align 16
+aesni_cbc_sha1_enc:
+
+       movl    OPENSSL_ia32cap_P+0(%rip),%r10d
+       movl    OPENSSL_ia32cap_P+4(%rip),%r11d
+       jmp     aesni_cbc_sha1_enc_ssse3
+       .byte   0xf3,0xc3
+.size  aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc
+.type  aesni_cbc_sha1_enc_ssse3,@function
+.align 16
+aesni_cbc_sha1_enc_ssse3:
+       movq    8(%rsp),%r10
+
+
+       pushq   %rbx
+       pushq   %rbp
+       pushq   %r12
+       pushq   %r13
+       pushq   %r14
+       pushq   %r15
+       leaq    -104(%rsp),%rsp
+
+
+       movq    %rdi,%r12
+       movq    %rsi,%r13
+       movq    %rdx,%r14
+       movq    %rcx,%r15
+       movdqu  (%r8),%xmm11
+       movq    %r8,88(%rsp)
+       shlq    $6,%r14
+       subq    %r12,%r13
+       movl    240(%r15),%r8d
+       addq    %r10,%r14
+
+       leaq    K_XX_XX(%rip),%r11
+       movl    0(%r9),%eax
+       movl    4(%r9),%ebx
+       movl    8(%r9),%ecx
+       movl    12(%r9),%edx
+       movl    %ebx,%esi
+       movl    16(%r9),%ebp
+
+       movdqa  64(%r11),%xmm6
+       movdqa  0(%r11),%xmm9
+       movdqu  0(%r10),%xmm0
+       movdqu  16(%r10),%xmm1
+       movdqu  32(%r10),%xmm2
+       movdqu  48(%r10),%xmm3
+.byte  102,15,56,0,198
+       addq    $64,%r10
+.byte  102,15,56,0,206
+.byte  102,15,56,0,214
+.byte  102,15,56,0,222
+       paddd   %xmm9,%xmm0
+       paddd   %xmm9,%xmm1
+       paddd   %xmm9,%xmm2
+       movdqa  %xmm0,0(%rsp)
+       psubd   %xmm9,%xmm0
+       movdqa  %xmm1,16(%rsp)
+       psubd   %xmm9,%xmm1
+       movdqa  %xmm2,32(%rsp)
+       psubd   %xmm9,%xmm2
+       movups  (%r15),%xmm13
+       movups  16(%r15),%xmm14
+       jmp     .Loop_ssse3
+.align 16
+.Loop_ssse3:
+       movdqa  %xmm1,%xmm4
+       addl    0(%rsp),%ebp
+       movups  0(%r12),%xmm12
+       xorps   %xmm13,%xmm12
+       xorps   %xmm12,%xmm11
+.byte  102,69,15,56,220,222
+       movups  32(%r15),%xmm15
+       xorl    %edx,%ecx
+       movdqa  %xmm3,%xmm8
+.byte  102,15,58,15,224,8
+       movl    %eax,%edi
+       roll    $5,%eax
+       paddd   %xmm3,%xmm9
+       andl    %ecx,%esi
+       xorl    %edx,%ecx
+       psrldq  $4,%xmm8
+       xorl    %edx,%esi
+       addl    %eax,%ebp
+       pxor    %xmm0,%xmm4
+       rorl    $2,%ebx
+       addl    %esi,%ebp
+       pxor    %xmm2,%xmm8
+       addl    4(%rsp),%edx
+       xorl    %ecx,%ebx
+       movl    %ebp,%esi
+       roll    $5,%ebp
+       pxor    %xmm8,%xmm4
+       andl    %ebx,%edi
+       xorl    %ecx,%ebx
+       movdqa  %xmm9,48(%rsp)
+       xorl    %ecx,%edi
+.byte  102,69,15,56,220,223
+       movups  48(%r15),%xmm14
+       addl    %ebp,%edx
+       movdqa  %xmm4,%xmm10
+       movdqa  %xmm4,%xmm8
+       rorl    $7,%eax
+       addl    %edi,%edx
+       addl    8(%rsp),%ecx
+       xorl    %ebx,%eax
+       pslldq  $12,%xmm10
+       paddd   %xmm4,%xmm4
+       movl    %edx,%edi
+       roll    $5,%edx
+       andl    %eax,%esi
+       xorl    %ebx,%eax
+       psrld   $31,%xmm8
+       xorl    %ebx,%esi
+       addl    %edx,%ecx
+       movdqa  %xmm10,%xmm9
+       rorl    $7,%ebp
+       addl    %esi,%ecx
+       psrld   $30,%xmm10
+       por     %xmm8,%xmm4
+       addl    12(%rsp),%ebx
+       xorl    %eax,%ebp
+       movl    %ecx,%esi
+       roll    $5,%ecx
+.byte  102,69,15,56,220,222
+       movups  64(%r15),%xmm15
+       pslld   $2,%xmm9
+       pxor    %xmm10,%xmm4
+       andl    %ebp,%edi
+       xorl    %eax,%ebp
+       movdqa  0(%r11),%xmm10
+       xorl    %eax,%edi
+       addl    %ecx,%ebx
+       pxor    %xmm9,%xmm4
+       rorl    $7,%edx
+       addl    %edi,%ebx
+       movdqa  %xmm2,%xmm5
+       addl    16(%rsp),%eax
+       xorl    %ebp,%edx
+       movdqa  %xmm4,%xmm9
+.byte  102,15,58,15,233,8
+       movl    %ebx,%edi
+       roll    $5,%ebx
+       paddd   %xmm4,%xmm10
+       andl    %edx,%esi
+       xorl    %ebp,%edx
+       psrldq  $4,%xmm9
+       xorl    %ebp,%esi
+       addl    %ebx,%eax
+       pxor    %xmm1,%xmm5
+       rorl    $7,%ecx
+       addl    %esi,%eax
+       pxor    %xmm3,%xmm9
+       addl    20(%rsp),%ebp
+.byte  102,69,15,56,220,223
+       movups  80(%r15),%xmm14
+       xorl    %edx,%ecx
+       movl    %eax,%esi
+       roll    $5,%eax
+       pxor    %xmm9,%xmm5
+       andl    %ecx,%edi
+       xorl    %edx,%ecx
+       movdqa  %xmm10,0(%rsp)
+       xorl    %edx,%edi
+       addl    %eax,%ebp
+       movdqa  %xmm5,%xmm8
+       movdqa  %xmm5,%xmm9
+       rorl    $7,%ebx
+       addl    %edi,%ebp
+       addl    24(%rsp),%edx
+       xorl    %ecx,%ebx
+       pslldq  $12,%xmm8
+       paddd   %xmm5,%xmm5
+       movl    %ebp,%edi
+       roll    $5,%ebp
+       andl    %ebx,%esi
+       xorl    %ecx,%ebx
+       psrld   $31,%xmm9
+       xorl    %ecx,%esi
+.byte  102,69,15,56,220,222
+       movups  96(%r15),%xmm15
+       addl    %ebp,%edx
+       movdqa  %xmm8,%xmm10
+       rorl    $7,%eax
+       addl    %esi,%edx
+       psrld   $30,%xmm8
+       por     %xmm9,%xmm5
+       addl    28(%rsp),%ecx
+       xorl    %ebx,%eax
+       movl    %edx,%esi
+       roll    $5,%edx
+       pslld   $2,%xmm10
+       pxor    %xmm8,%xmm5
+       andl    %eax,%edi
+       xorl    %ebx,%eax
+       movdqa  16(%r11),%xmm8
+       xorl    %ebx,%edi
+       addl    %edx,%ecx
+       pxor    %xmm10,%xmm5
+       rorl    $7,%ebp
+       addl    %edi,%ecx
+       movdqa  %xmm3,%xmm6
+       addl    32(%rsp),%ebx
+       xorl    %eax,%ebp
+       movdqa  %xmm5,%xmm10
+.byte  102,15,58,15,242,8
+       movl    %ecx,%edi
+       roll    $5,%ecx
+.byte  102,69,15,56,220,223
+       movups  112(%r15),%xmm14
+       paddd   %xmm5,%xmm8
+       andl    %ebp,%esi
+       xorl    %eax,%ebp
+       psrldq  $4,%xmm10
+       xorl    %eax,%esi
+       addl    %ecx,%ebx
+       pxor    %xmm2,%xmm6
+       rorl    $7,%edx
+       addl    %esi,%ebx
+       pxor    %xmm4,%xmm10
+       addl    36(%rsp),%eax
+       xorl    %ebp,%edx
+       movl    %ebx,%esi
+       roll    $5,%ebx
+       pxor    %xmm10,%xmm6
+       andl    %edx,%edi
+       xorl    %ebp,%edx
+       movdqa  %xmm8,16(%rsp)
+       xorl    %ebp,%edi
+       addl    %ebx,%eax
+       movdqa  %xmm6,%xmm9
+       movdqa  %xmm6,%xmm10
+       rorl    $7,%ecx
+       addl    %edi,%eax
+       addl    40(%rsp),%ebp
+.byte  102,69,15,56,220,222
+       movups  128(%r15),%xmm15
+       xorl    %edx,%ecx
+       pslldq  $12,%xmm9
+       paddd   %xmm6,%xmm6
+       movl    %eax,%edi
+       roll    $5,%eax
+       andl    %ecx,%esi
+       xorl    %edx,%ecx
+       psrld   $31,%xmm10
+       xorl    %edx,%esi
+       addl    %eax,%ebp
+       movdqa  %xmm9,%xmm8
+       rorl    $7,%ebx
+       addl    %esi,%ebp
+       psrld   $30,%xmm9
+       por     %xmm10,%xmm6
+       addl    44(%rsp),%edx
+       xorl    %ecx,%ebx
+       movl    %ebp,%esi
+       roll    $5,%ebp
+       pslld   $2,%xmm8
+       pxor    %xmm9,%xmm6
+       andl    %ebx,%edi
+       xorl    %ecx,%ebx
+       movdqa  16(%r11),%xmm9
+       xorl    %ecx,%edi
+.byte  102,69,15,56,220,223
+       movups  144(%r15),%xmm14
+       addl    %ebp,%edx
+       pxor    %xmm8,%xmm6
+       rorl    $7,%eax
+       addl    %edi,%edx
+       movdqa  %xmm4,%xmm7
+       addl    48(%rsp),%ecx
+       xorl    %ebx,%eax
+       movdqa  %xmm6,%xmm8
+.byte  102,15,58,15,251,8
+       movl    %edx,%edi
+       roll    $5,%edx
+       paddd   %xmm6,%xmm9
+       andl    %eax,%esi
+       xorl    %ebx,%eax
+       psrldq  $4,%xmm8
+       xorl    %ebx,%esi
+       addl    %edx,%ecx
+       pxor    %xmm3,%xmm7
+       rorl    $7,%ebp
+       addl    %esi,%ecx
+       pxor    %xmm5,%xmm8
+       addl    52(%rsp),%ebx
+       xorl    %eax,%ebp
+       movl    %ecx,%esi
+       roll    $5,%ecx
+.byte  102,69,15,56,220,222
+       movups  160(%r15),%xmm15
+       pxor    %xmm8,%xmm7
+       andl    %ebp,%edi
+       xorl    %eax,%ebp
+       movdqa  %xmm9,32(%rsp)
+       xorl    %eax,%edi
+       addl    %ecx,%ebx
+       movdqa  %xmm7,%xmm10
+       movdqa  %xmm7,%xmm8
+       rorl    $7,%edx
+       addl    %edi,%ebx
+       addl    56(%rsp),%eax
+       xorl    %ebp,%edx
+       pslldq  $12,%xmm10
+       paddd   %xmm7,%xmm7
+       movl    %ebx,%edi
+       roll    $5,%ebx
+       andl    %edx,%esi
+       xorl    %ebp,%edx
+       psrld   $31,%xmm8
+       xorl    %ebp,%esi
+       addl    %ebx,%eax
+       movdqa  %xmm10,%xmm9
+       rorl    $7,%ecx
+       addl    %esi,%eax
+       psrld   $30,%xmm10
+       por     %xmm8,%xmm7
+       addl    60(%rsp),%ebp
+       cmpl    $11,%r8d
+       jb      .Laesenclast1
+       movups  176(%r15),%xmm14
+.byte  102,69,15,56,220,223
+       movups  192(%r15),%xmm15
+.byte  102,69,15,56,220,222
+       je      .Laesenclast1
+       movups  208(%r15),%xmm14
+.byte  102,69,15,56,220,223
+       movups  224(%r15),%xmm15
+.byte  102,69,15,56,220,222
+.Laesenclast1:
+.byte  102,69,15,56,221,223
+       movups  16(%r15),%xmm14
+       xorl    %edx,%ecx
+       movl    %eax,%esi
+       roll    $5,%eax
+       pslld   $2,%xmm9
+       pxor    %xmm10,%xmm7
+       andl    %ecx,%edi
+       xorl    %edx,%ecx
+       movdqa  16(%r11),%xmm10
+       xorl    %edx,%edi
+       addl    %eax,%ebp
+       pxor    %xmm9,%xmm7
+       rorl    $7,%ebx
+       addl    %edi,%ebp
+       movdqa  %xmm7,%xmm9
+       addl    0(%rsp),%edx
+       pxor    %xmm4,%xmm0
+.byte  102,68,15,58,15,206,8
+       xorl    %ecx,%ebx
+       movl    %ebp,%edi
+       roll    $5,%ebp
+       pxor    %xmm1,%xmm0
+       andl    %ebx,%esi
+       xorl    %ecx,%ebx
+       movdqa  %xmm10,%xmm8
+       paddd   %xmm7,%xmm10
+       xorl    %ecx,%esi
+       movups  16(%r12),%xmm12
+       xorps   %xmm13,%xmm12
+       movups  %xmm11,0(%r13,%r12,1)
+       xorps   %xmm12,%xmm11
+.byte  102,69,15,56,220,222
+       movups  32(%r15),%xmm15
+       addl    %ebp,%edx
+       pxor    %xmm9,%xmm0
+       rorl    $7,%eax
+       addl    %esi,%edx
+       addl    4(%rsp),%ecx
+       xorl    %ebx,%eax
+       movdqa  %xmm0,%xmm9
+       movdqa  %xmm10,48(%rsp)
+       movl    %edx,%esi
+       roll    $5,%edx
+       andl    %eax,%edi
+       xorl    %ebx,%eax
+       pslld   $2,%xmm0
+       xorl    %ebx,%edi
+       addl    %edx,%ecx
+       psrld   $30,%xmm9
+       rorl    $7,%ebp
+       addl    %edi,%ecx
+       addl    8(%rsp),%ebx
+       xorl    %eax,%ebp
+       movl    %ecx,%edi
+       roll    $5,%ecx
+.byte  102,69,15,56,220,223
+       movups  48(%r15),%xmm14
+       por     %xmm9,%xmm0
+       andl    %ebp,%esi
+       xorl    %eax,%ebp
+       movdqa  %xmm0,%xmm10
+       xorl    %eax,%esi
+       addl    %ecx,%ebx
+       rorl    $7,%edx
+       addl    %esi,%ebx
+       addl    12(%rsp),%eax
+       xorl    %ebp,%edx
+       movl    %ebx,%esi
+       roll    $5,%ebx
+       andl    %edx,%edi
+       xorl    %ebp,%edx
+       xorl    %ebp,%edi
+       addl    %ebx,%eax
+       rorl    $7,%ecx
+       addl    %edi,%eax
+       addl    16(%rsp),%ebp
+.byte  102,69,15,56,220,222
+       movups  64(%r15),%xmm15
+       pxor    %xmm5,%xmm1
+.byte  102,68,15,58,15,215,8
+       xorl    %edx,%esi
+       movl    %eax,%edi
+       roll    $5,%eax
+       pxor    %xmm2,%xmm1
+       xorl    %ecx,%esi
+       addl    %eax,%ebp
+       movdqa  %xmm8,%xmm9
+       paddd   %xmm0,%xmm8
+       rorl    $7,%ebx
+       addl    %esi,%ebp
+       pxor    %xmm10,%xmm1
+       addl    20(%rsp),%edx
+       xorl    %ecx,%edi
+       movl    %ebp,%esi
+       roll    $5,%ebp
+       movdqa  %xmm1,%xmm10
+       movdqa  %xmm8,0(%rsp)
+       xorl    %ebx,%edi
+       addl    %ebp,%edx
+       rorl    $7,%eax
+       addl    %edi,%edx
+       pslld   $2,%xmm1
+       addl    24(%rsp),%ecx
+       xorl    %ebx,%esi
+       psrld   $30,%xmm10
+       movl    %edx,%edi
+       roll    $5,%edx
+       xorl    %eax,%esi
+.byte  102,69,15,56,220,223
+       movups  80(%r15),%xmm14
+       addl    %edx,%ecx
+       rorl    $7,%ebp
+       addl    %esi,%ecx
+       por     %xmm10,%xmm1
+       addl    28(%rsp),%ebx
+       xorl    %eax,%edi
+       movdqa  %xmm1,%xmm8
+       movl    %ecx,%esi
+       roll    $5,%ecx
+       xorl    %ebp,%edi
+       addl    %ecx,%ebx
+       rorl    $7,%edx
+       addl    %edi,%ebx
+       addl    32(%rsp),%eax
+       pxor    %xmm6,%xmm2
+.byte  102,68,15,58,15,192,8
+       xorl    %ebp,%esi
+       movl    %ebx,%edi
+       roll    $5,%ebx
+       pxor    %xmm3,%xmm2
+       xorl    %edx,%esi
+       addl    %ebx,%eax
+       movdqa  32(%r11),%xmm10
+       paddd   %xmm1,%xmm9
+       rorl    $7,%ecx
+       addl    %esi,%eax
+       pxor    %xmm8,%xmm2
+       addl    36(%rsp),%ebp
+.byte  102,69,15,56,220,222
+       movups  96(%r15),%xmm15
+       xorl    %edx,%edi
+       movl    %eax,%esi
+       roll    $5,%eax
+       movdqa  %xmm2,%xmm8
+       movdqa  %xmm9,16(%rsp)
+       xorl    %ecx,%edi
+       addl    %eax,%ebp
+       rorl    $7,%ebx
+       addl    %edi,%ebp
+       pslld   $2,%xmm2
+       addl    40(%rsp),%edx
+       xorl    %ecx,%esi
+       psrld   $30,%xmm8
+       movl    %ebp,%edi
+       roll    $5,%ebp
+       xorl    %ebx,%esi
+       addl    %ebp,%edx
+       rorl    $7,%eax
+       addl    %esi,%edx
+       por     %xmm8,%xmm2
+       addl    44(%rsp),%ecx
+       xorl    %ebx,%edi
+       movdqa  %xmm2,%xmm9
+       movl    %edx,%esi
+       roll    $5,%edx
+       xorl    %eax,%edi
+.byte  102,69,15,56,220,223
+       movups  112(%r15),%xmm14
+       addl    %edx,%ecx
+       rorl    $7,%ebp
+       addl    %edi,%ecx
+       addl    48(%rsp),%ebx
+       pxor    %xmm7,%xmm3
+.byte  102,68,15,58,15,201,8
+       xorl    %eax,%esi
+       movl    %ecx,%edi
+       roll    $5,%ecx
+       pxor    %xmm4,%xmm3
+       xorl    %ebp,%esi
+       addl    %ecx,%ebx
+       movdqa  %xmm10,%xmm8
+       paddd   %xmm2,%xmm10
+       rorl    $7,%edx
+       addl    %esi,%ebx
+       pxor    %xmm9,%xmm3
+       addl    52(%rsp),%eax
+       xorl    %ebp,%edi
+       movl    %ebx,%esi
+       roll    $5,%ebx
+       movdqa  %xmm3,%xmm9
+       movdqa  %xmm10,32(%rsp)
+       xorl    %edx,%edi
+       addl    %ebx,%eax
+       rorl    $7,%ecx
+       addl    %edi,%eax
+       pslld   $2,%xmm3
+       addl    56(%rsp),%ebp
+.byte  102,69,15,56,220,222
+       movups  128(%r15),%xmm15
+       xorl    %edx,%esi
+       psrld   $30,%xmm9
+       movl    %eax,%edi
+       roll    $5,%eax
+       xorl    %ecx,%esi
+       addl    %eax,%ebp
+       rorl    $7,%ebx
+       addl    %esi,%ebp
+       por     %xmm9,%xmm3
+       addl    60(%rsp),%edx
+       xorl    %ecx,%edi
+       movdqa  %xmm3,%xmm10
+       movl    %ebp,%esi
+       roll    $5,%ebp
+       xorl    %ebx,%edi
+       addl    %ebp,%edx
+       rorl    $7,%eax
+       addl    %edi,%edx
+       addl    0(%rsp),%ecx
+       pxor    %xmm0,%xmm4
+.byte  102,68,15,58,15,210,8
+       xorl    %ebx,%esi
+       movl    %edx,%edi
+       roll    $5,%edx
+       pxor    %xmm5,%xmm4
+       xorl    %eax,%esi
+.byte  102,69,15,56,220,223
+       movups  144(%r15),%xmm14
+       addl    %edx,%ecx
+       movdqa  %xmm8,%xmm9
+       paddd   %xmm3,%xmm8
+       rorl    $7,%ebp
+       addl    %esi,%ecx
+       pxor    %xmm10,%xmm4
+       addl    4(%rsp),%ebx
+       xorl    %eax,%edi
+       movl    %ecx,%esi
+       roll    $5,%ecx
+       movdqa  %xmm4,%xmm10
+       movdqa  %xmm8,48(%rsp)
+       xorl    %ebp,%edi
+       addl    %ecx,%ebx
+       rorl    $7,%edx
+       addl    %edi,%ebx
+       pslld   $2,%xmm4
+       addl    8(%rsp),%eax
+       xorl    %ebp,%esi
+       psrld   $30,%xmm10
+       movl    %ebx,%edi
+       roll    $5,%ebx
+       xorl    %edx,%esi
+       addl    %ebx,%eax
+       rorl    $7,%ecx
+       addl    %esi,%eax
+       por     %xmm10,%xmm4
+       addl    12(%rsp),%ebp
+.byte  102,69,15,56,220,222
+       movups  160(%r15),%xmm15
+       xorl    %edx,%edi
+       movdqa  %xmm4,%xmm8
+       movl    %eax,%esi
+       roll    $5,%eax
+       xorl    %ecx,%edi
+       addl    %eax,%ebp
+       rorl    $7,%ebx
+       addl    %edi,%ebp
+       addl    16(%rsp),%edx
+       pxor    %xmm1,%xmm5
+.byte  102,68,15,58,15,195,8
+       xorl    %ecx,%esi
+       movl    %ebp,%edi
+       roll    $5,%ebp
+       pxor    %xmm6,%xmm5
+       xorl    %ebx,%esi
+       addl    %ebp,%edx
+       movdqa  %xmm9,%xmm10
+       paddd   %xmm4,%xmm9
+       rorl    $7,%eax
+       addl    %esi,%edx
+       pxor    %xmm8,%xmm5
+       addl    20(%rsp),%ecx
+       xorl    %ebx,%edi
+       movl    %edx,%esi
+       roll    $5,%edx
+       movdqa  %xmm5,%xmm8
+       movdqa  %xmm9,0(%rsp)
+       xorl    %eax,%edi
+       cmpl    $11,%r8d
+       jb      .Laesenclast2
+       movups  176(%r15),%xmm14
+.byte  102,69,15,56,220,223
+       movups  192(%r15),%xmm15
+.byte  102,69,15,56,220,222
+       je      .Laesenclast2
+       movups  208(%r15),%xmm14
+.byte  102,69,15,56,220,223
+       movups  224(%r15),%xmm15
+.byte  102,69,15,56,220,222
+.Laesenclast2:
+.byte  102,69,15,56,221,223
+       movups  16(%r15),%xmm14
+       addl    %edx,%ecx
+       rorl    $7,%ebp
+       addl    %edi,%ecx
+       pslld   $2,%xmm5
+       addl    24(%rsp),%ebx
+       xorl    %eax,%esi
+       psrld   $30,%xmm8
+       movl    %ecx,%edi
+       roll    $5,%ecx
+       xorl    %ebp,%esi
+       addl    %ecx,%ebx
+       rorl    $7,%edx
+       addl    %esi,%ebx
+       por     %xmm8,%xmm5
+       addl    28(%rsp),%eax
+       xorl    %ebp,%edi
+       movdqa  %xmm5,%xmm9
+       movl    %ebx,%esi
+       roll    $5,%ebx
+       xorl    %edx,%edi
+       addl    %ebx,%eax
+       rorl    $7,%ecx
+       addl    %edi,%eax
+       movl    %ecx,%edi
+       movups  32(%r12),%xmm12
+       xorps   %xmm13,%xmm12
+       movups  %xmm11,16(%r13,%r12,1)
+       xorps   %xmm12,%xmm11
+.byte  102,69,15,56,220,222
+       movups  32(%r15),%xmm15
+       pxor    %xmm2,%xmm6
+.byte  102,68,15,58,15,204,8
+       xorl    %edx,%ecx
+       addl    32(%rsp),%ebp
+       andl    %edx,%edi
+       pxor    %xmm7,%xmm6
+       andl    %ecx,%esi
+       rorl    $7,%ebx
+       movdqa  %xmm10,%xmm8
+       paddd   %xmm5,%xmm10
+       addl    %edi,%ebp
+       movl    %eax,%edi
+       pxor    %xmm9,%xmm6
+       roll    $5,%eax
+       addl    %esi,%ebp
+       xorl    %edx,%ecx
+       addl    %eax,%ebp
+       movdqa  %xmm6,%xmm9
+       movdqa  %xmm10,16(%rsp)
+       movl    %ebx,%esi
+       xorl    %ecx,%ebx
+       addl    36(%rsp),%edx
+       andl    %ecx,%esi
+       pslld   $2,%xmm6
+       andl    %ebx,%edi
+       rorl    $7,%eax
+       psrld   $30,%xmm9
+       addl    %esi,%edx
+       movl    %ebp,%esi
+       roll    $5,%ebp
+.byte  102,69,15,56,220,223
+       movups  48(%r15),%xmm14
+       addl    %edi,%edx
+       xorl    %ecx,%ebx
+       addl    %ebp,%edx
+       por     %xmm9,%xmm6
+       movl    %eax,%edi
+       xorl    %ebx,%eax
+       movdqa  %xmm6,%xmm10
+       addl    40(%rsp),%ecx
+       andl    %ebx,%edi
+       andl    %eax,%esi
+       rorl    $7,%ebp
+       addl    %edi,%ecx
+       movl    %edx,%edi
+       roll    $5,%edx
+       addl    %esi,%ecx
+       xorl    %ebx,%eax
+       addl    %edx,%ecx
+       movl    %ebp,%esi
+       xorl    %eax,%ebp
+       addl    44(%rsp),%ebx
+       andl    %eax,%esi
+       andl    %ebp,%edi
+.byte  102,69,15,56,220,222
+       movups  64(%r15),%xmm15
+       rorl    $7,%edx
+       addl    %esi,%ebx
+       movl    %ecx,%esi
+       roll    $5,%ecx
+       addl    %edi,%ebx
+       xorl    %eax,%ebp
+       addl    %ecx,%ebx
+       movl    %edx,%edi
+       pxor    %xmm3,%xmm7
+.byte  102,68,15,58,15,213,8
+       xorl    %ebp,%edx
+       addl    48(%rsp),%eax
+       andl    %ebp,%edi
+       pxor    %xmm0,%xmm7
+       andl    %edx,%esi
+       rorl    $7,%ecx
+       movdqa  48(%r11),%xmm9
+       paddd   %xmm6,%xmm8
+       addl    %edi,%eax
+       movl    %ebx,%edi
+       pxor    %xmm10,%xmm7
+       roll    $5,%ebx
+       addl    %esi,%eax
+       xorl    %ebp,%edx
+       addl    %ebx,%eax
+       movdqa  %xmm7,%xmm10
+       movdqa  %xmm8,32(%rsp)
+       movl    %ecx,%esi
+.byte  102,69,15,56,220,223
+       movups  80(%r15),%xmm14
+       xorl    %edx,%ecx
+       addl    52(%rsp),%ebp
+       andl    %edx,%esi
+       pslld   $2,%xmm7
+       andl    %ecx,%edi
+       rorl    $7,%ebx
+       psrld   $30,%xmm10
+       addl    %esi,%ebp
+       movl    %eax,%esi
+       roll    $5,%eax
+       addl    %edi,%ebp
+       xorl    %edx,%ecx
+       addl    %eax,%ebp
+       por     %xmm10,%xmm7
+       movl    %ebx,%edi
+       xorl    %ecx,%ebx
+       movdqa  %xmm7,%xmm8
+       addl    56(%rsp),%edx
+       andl    %ecx,%edi
+       andl    %ebx,%esi
+       rorl    $7,%eax
+       addl    %edi,%edx
+       movl    %ebp,%edi
+       roll    $5,%ebp
+.byte  102,69,15,56,220,222
+       movups  96(%r15),%xmm15
+       addl    %esi,%edx
+       xorl    %ecx,%ebx
+       addl    %ebp,%edx
+       movl    %eax,%esi
+       xorl    %ebx,%eax
+       addl    60(%rsp),%ecx
+       andl    %ebx,%esi
+       andl    %eax,%edi
+       rorl    $7,%ebp
+       addl    %esi,%ecx
+       movl    %edx,%esi
+       roll    $5,%edx
+       addl    %edi,%ecx
+       xorl    %ebx,%eax
+       addl    %edx,%ecx
+       movl    %ebp,%edi
+       pxor    %xmm4,%xmm0
+.byte  102,68,15,58,15,198,8
+       xorl    %eax,%ebp
+       addl    0(%rsp),%ebx
+       andl    %eax,%edi
+       pxor    %xmm1,%xmm0
+       andl    %ebp,%esi
+.byte  102,69,15,56,220,223
+       movups  112(%r15),%xmm14
+       rorl    $7,%edx
+       movdqa  %xmm9,%xmm10
+       paddd   %xmm7,%xmm9
+       addl    %edi,%ebx
+       movl    %ecx,%edi
+       pxor    %xmm8,%xmm0
+       roll    $5,%ecx
+       addl    %esi,%ebx
+       xorl    %eax,%ebp
+       addl    %ecx,%ebx
+       movdqa  %xmm0,%xmm8
+       movdqa  %xmm9,48(%rsp)
+       movl    %edx,%esi
+       xorl    %ebp,%edx
+       addl    4(%rsp),%eax
+       andl    %ebp,%esi
+       pslld   $2,%xmm0
+       andl    %edx,%edi
+       rorl    $7,%ecx
+       psrld   $30,%xmm8
+       addl    %esi,%eax
+       movl    %ebx,%esi
+       roll    $5,%ebx
+       addl    %edi,%eax
+       xorl    %ebp,%edx
+       addl    %ebx,%eax
+       por     %xmm8,%xmm0
+       movl    %ecx,%edi
+.byte  102,69,15,56,220,222
+       movups  128(%r15),%xmm15
+       xorl    %edx,%ecx
+       movdqa  %xmm0,%xmm9
+       addl    8(%rsp),%ebp
+       andl    %edx,%edi
+       andl    %ecx,%esi
+       rorl    $7,%ebx
+       addl    %edi,%ebp
+       movl    %eax,%edi
+       roll    $5,%eax
+       addl    %esi,%ebp
+       xorl    %edx,%ecx
+       addl    %eax,%ebp
+       movl    %ebx,%esi
+       xorl    %ecx,%ebx
+       addl    12(%rsp),%edx
+       andl    %ecx,%esi
+       andl    %ebx,%edi
+       rorl    $7,%eax
+       addl    %esi,%edx
+       movl    %ebp,%esi
+       roll    $5,%ebp
+.byte  102,69,15,56,220,223
+       movups  144(%r15),%xmm14
+       addl    %edi,%edx
+       xorl    %ecx,%ebx
+       addl    %ebp,%edx
+       movl    %eax,%edi
+       pxor    %xmm5,%xmm1
+.byte  102,68,15,58,15,207,8
+       xorl    %ebx,%eax
+       addl    16(%rsp),%ecx
+       andl    %ebx,%edi
+       pxor    %xmm2,%xmm1
+       andl    %eax,%esi
+       rorl    $7,%ebp
+       movdqa  %xmm10,%xmm8
+       paddd   %xmm0,%xmm10
+       addl    %edi,%ecx
+       movl    %edx,%edi
+       pxor    %xmm9,%xmm1
+       roll    $5,%edx
+       addl    %esi,%ecx
+       xorl    %ebx,%eax
+       addl    %edx,%ecx
+       movdqa  %xmm1,%xmm9
+       movdqa  %xmm10,0(%rsp)
+       movl    %ebp,%esi
+       xorl    %eax,%ebp
+       addl    20(%rsp),%ebx
+       andl    %eax,%esi
+       pslld   $2,%xmm1
+       andl    %ebp,%edi
+.byte  102,69,15,56,220,222
+       movups  160(%r15),%xmm15
+       rorl    $7,%edx
+       psrld   $30,%xmm9
+       addl    %esi,%ebx
+       movl    %ecx,%esi
+       roll    $5,%ecx
+       addl    %edi,%ebx
+       xorl    %eax,%ebp
+       addl    %ecx,%ebx
+       por     %xmm9,%xmm1
+       movl    %edx,%edi
+       xorl    %ebp,%edx
+       movdqa  %xmm1,%xmm10
+       addl    24(%rsp),%eax
+       andl    %ebp,%edi
+       andl    %edx,%esi
+       rorl    $7,%ecx
+       addl    %edi,%eax
+       movl    %ebx,%edi
+       roll    $5,%ebx
+       addl    %esi,%eax
+       xorl    %ebp,%edx
+       addl    %ebx,%eax
+       movl    %ecx,%esi
+       cmpl    $11,%r8d
+       jb      .Laesenclast3
+       movups  176(%r15),%xmm14
+.byte  102,69,15,56,220,223
+       movups  192(%r15),%xmm15
+.byte  102,69,15,56,220,222
+       je      .Laesenclast3
+       movups  208(%r15),%xmm14
+.byte  102,69,15,56,220,223
+       movups  224(%r15),%xmm15
+.byte  102,69,15,56,220,222
+.Laesenclast3:
+.byte  102,69,15,56,221,223
+       movups  16(%r15),%xmm14
+       xorl    %edx,%ecx
+       addl    28(%rsp),%ebp
+       andl    %edx,%esi
+       andl    %ecx,%edi
+       rorl    $7,%ebx
+       addl    %esi,%ebp
+       movl    %eax,%esi
+       roll    $5,%eax
+       addl    %edi,%ebp
+       xorl    %edx,%ecx
+       addl    %eax,%ebp
+       movl    %ebx,%edi
+       pxor    %xmm6,%xmm2
+.byte  102,68,15,58,15,208,8
+       xorl    %ecx,%ebx
+       addl    32(%rsp),%edx
+       andl    %ecx,%edi
+       pxor    %xmm3,%xmm2
+       andl    %ebx,%esi
+       rorl    $7,%eax
+       movdqa  %xmm8,%xmm9
+       paddd   %xmm1,%xmm8
+       addl    %edi,%edx
+       movl    %ebp,%edi
+       pxor    %xmm10,%xmm2
+       roll    $5,%ebp
+       movups  48(%r12),%xmm12
+       xorps   %xmm13,%xmm12
+       movups  %xmm11,32(%r13,%r12,1)
+       xorps   %xmm12,%xmm11
+.byte  102,69,15,56,220,222
+       movups  32(%r15),%xmm15
+       addl    %esi,%edx
+       xorl    %ecx,%ebx
+       addl    %ebp,%edx
+       movdqa  %xmm2,%xmm10
+       movdqa  %xmm8,16(%rsp)
+       movl    %eax,%esi
+       xorl    %ebx,%eax
+       addl    36(%rsp),%ecx
+       andl    %ebx,%esi
+       pslld   $2,%xmm2
+       andl    %eax,%edi
+       rorl    $7,%ebp
+       psrld   $30,%xmm10
+       addl    %esi,%ecx
+       movl    %edx,%esi
+       roll    $5,%edx
+       addl    %edi,%ecx
+       xorl    %ebx,%eax
+       addl    %edx,%ecx
+       por     %xmm10,%xmm2
+       movl    %ebp,%edi
+       xorl    %eax,%ebp
+       movdqa  %xmm2,%xmm8
+       addl    40(%rsp),%ebx
+       andl    %eax,%edi
+       andl    %ebp,%esi
+.byte  102,69,15,56,220,223
+       movups  48(%r15),%xmm14
+       rorl    $7,%edx
+       addl    %edi,%ebx
+       movl    %ecx,%edi
+       roll    $5,%ecx
+       addl    %esi,%ebx
+       xorl    %eax,%ebp
+       addl    %ecx,%ebx
+       movl    %edx,%esi
+       xorl    %ebp,%edx
+       addl    44(%rsp),%eax
+       andl    %ebp,%esi
+       andl    %edx,%edi
+       rorl    $7,%ecx
+       addl    %esi,%eax
+       movl    %ebx,%esi
+       roll    $5,%ebx
+       addl    %edi,%eax
+       xorl    %ebp,%edx
+       addl    %ebx,%eax
+       addl    48(%rsp),%ebp
+.byte  102,69,15,56,220,222
+       movups  64(%r15),%xmm15
+       pxor    %xmm7,%xmm3
+.byte  102,68,15,58,15,193,8
+       xorl    %edx,%esi
+       movl    %eax,%edi
+       roll    $5,%eax
+       pxor    %xmm4,%xmm3
+       xorl    %ecx,%esi
+       addl    %eax,%ebp
+       movdqa  %xmm9,%xmm10
+       paddd   %xmm2,%xmm9
+       rorl    $7,%ebx
+       addl    %esi,%ebp
+       pxor    %xmm8,%xmm3
+       addl    52(%rsp),%edx
+       xorl    %ecx,%edi
+       movl    %ebp,%esi
+       roll    $5,%ebp
+       movdqa  %xmm3,%xmm8
+       movdqa  %xmm9,32(%rsp)
+       xorl    %ebx,%edi
+       addl    %ebp,%edx
+       rorl    $7,%eax
+       addl    %edi,%edx
+       pslld   $2,%xmm3
+       addl    56(%rsp),%ecx
+       xorl    %ebx,%esi
+       psrld   $30,%xmm8
+       movl    %edx,%edi
+       roll    $5,%edx
+       xorl    %eax,%esi
+.byte  102,69,15,56,220,223
+       movups  80(%r15),%xmm14
+       addl    %edx,%ecx
+       rorl    $7,%ebp
+       addl    %esi,%ecx
+       por     %xmm8,%xmm3
+       addl    60(%rsp),%ebx
+       xorl    %eax,%edi
+       movl    %ecx,%esi
+       roll    $5,%ecx
+       xorl    %ebp,%edi
+       addl    %ecx,%ebx
+       rorl    $7,%edx
+       addl    %edi,%ebx
+       addl    0(%rsp),%eax
+       paddd   %xmm3,%xmm10
+       xorl    %ebp,%esi
+       movl    %ebx,%edi
+       roll    $5,%ebx
+       xorl    %edx,%esi
+       movdqa  %xmm10,48(%rsp)
+       addl    %ebx,%eax
+       rorl    $7,%ecx
+       addl    %esi,%eax
+       addl    4(%rsp),%ebp
+.byte  102,69,15,56,220,222
+       movups  96(%r15),%xmm15
+       xorl    %edx,%edi
+       movl    %eax,%esi
+       roll    $5,%eax
+       xorl    %ecx,%edi
+       addl    %eax,%ebp
+       rorl    $7,%ebx
+       addl    %edi,%ebp
+       addl    8(%rsp),%edx
+       xorl    %ecx,%esi
+       movl    %ebp,%edi
+       roll    $5,%ebp
+       xorl    %ebx,%esi
+       addl    %ebp,%edx
+       rorl    $7,%eax
+       addl    %esi,%edx
+       addl    12(%rsp),%ecx
+       xorl    %ebx,%edi
+       movl    %edx,%esi
+       roll    $5,%edx
+       xorl    %eax,%edi
+.byte  102,69,15,56,220,223
+       movups  112(%r15),%xmm14
+       addl    %edx,%ecx
+       rorl    $7,%ebp
+       addl    %edi,%ecx
+       cmpq    %r14,%r10
+       je      .Ldone_ssse3
+       movdqa  64(%r11),%xmm6
+       movdqa  0(%r11),%xmm9
+       movdqu  0(%r10),%xmm0
+       movdqu  16(%r10),%xmm1
+       movdqu  32(%r10),%xmm2
+       movdqu  48(%r10),%xmm3
+.byte  102,15,56,0,198
+       addq    $64,%r10
+       addl    16(%rsp),%ebx
+       xorl    %eax,%esi
+.byte  102,15,56,0,206
+       movl    %ecx,%edi
+       roll    $5,%ecx
+       paddd   %xmm9,%xmm0
+       xorl    %ebp,%esi
+       addl    %ecx,%ebx
+       rorl    $7,%edx
+       addl    %esi,%ebx
+       movdqa  %xmm0,0(%rsp)
+       addl    20(%rsp),%eax
+       xorl    %ebp,%edi
+       psubd   %xmm9,%xmm0
+       movl    %ebx,%esi
+       roll    $5,%ebx
+       xorl    %edx,%edi
+       addl    %ebx,%eax
+       rorl    $7,%ecx
+       addl    %edi,%eax
+       addl    24(%rsp),%ebp
+.byte  102,69,15,56,220,222
+       movups  128(%r15),%xmm15
+       xorl    %edx,%esi
+       movl    %eax,%edi
+       roll    $5,%eax
+       xorl    %ecx,%esi
+       addl    %eax,%ebp
+       rorl    $7,%ebx
+       addl    %esi,%ebp
+       addl    28(%rsp),%edx
+       xorl    %ecx,%edi
+       movl    %ebp,%esi
+       roll    $5,%ebp
+       xorl    %ebx,%edi
+       addl    %ebp,%edx
+       rorl    $7,%eax
+       addl    %edi,%edx
+       addl    32(%rsp),%ecx
+       xorl    %ebx,%esi
+.byte  102,15,56,0,214
+       movl    %edx,%edi
+       roll    $5,%edx
+       paddd   %xmm9,%xmm1
+       xorl    %eax,%esi
+.byte  102,69,15,56,220,223
+       movups  144(%r15),%xmm14
+       addl    %edx,%ecx
+       rorl    $7,%ebp
+       addl    %esi,%ecx
+       movdqa  %xmm1,16(%rsp)
+       addl    36(%rsp),%ebx
+       xorl    %eax,%edi
+       psubd   %xmm9,%xmm1
+       movl    %ecx,%esi
+       roll    $5,%ecx
+       xorl    %ebp,%edi
+       addl    %ecx,%ebx
+       rorl    $7,%edx
+       addl    %edi,%ebx
+       addl    40(%rsp),%eax
+       xorl    %ebp,%esi
+       movl    %ebx,%edi
+       roll    $5,%ebx
+       xorl    %edx,%esi
+       addl    %ebx,%eax
+       rorl    $7,%ecx
+       addl    %esi,%eax
+       addl    44(%rsp),%ebp
+.byte  102,69,15,56,220,222
+       movups  160(%r15),%xmm15
+       xorl    %edx,%edi
+       movl    %eax,%esi
+       roll    $5,%eax
+       xorl    %ecx,%edi
+       addl    %eax,%ebp
+       rorl    $7,%ebx
+       addl    %edi,%ebp
+       addl    48(%rsp),%edx
+       xorl    %ecx,%esi
+.byte  102,15,56,0,222
+       movl    %ebp,%edi
+       roll    $5,%ebp
+       paddd   %xmm9,%xmm2
+       xorl    %ebx,%esi
+       addl    %ebp,%edx
+       rorl    $7,%eax
+       addl    %esi,%edx
+       movdqa  %xmm2,32(%rsp)
+       addl    52(%rsp),%ecx
+       xorl    %ebx,%edi
+       psubd   %xmm9,%xmm2
+       movl    %edx,%esi
+       roll    $5,%edx
+       xorl    %eax,%edi
+       cmpl    $11,%r8d
+       jb      .Laesenclast4
+       movups  176(%r15),%xmm14
+.byte  102,69,15,56,220,223
+       movups  192(%r15),%xmm15
+.byte  102,69,15,56,220,222
+       je      .Laesenclast4
+       movups  208(%r15),%xmm14
+.byte  102,69,15,56,220,223
+       movups  224(%r15),%xmm15
+.byte  102,69,15,56,220,222
+.Laesenclast4:
+.byte  102,69,15,56,221,223
+       movups  16(%r15),%xmm14
+       addl    %edx,%ecx
+       rorl    $7,%ebp
+       addl    %edi,%ecx
+       addl    56(%rsp),%ebx
+       xorl    %eax,%esi
+       movl    %ecx,%edi
+       roll    $5,%ecx
+       xorl    %ebp,%esi
+       addl    %ecx,%ebx
+       rorl    $7,%edx
+       addl    %esi,%ebx
+       addl    60(%rsp),%eax
+       xorl    %ebp,%edi
+       movl    %ebx,%esi
+       roll    $5,%ebx
+       xorl    %edx,%edi
+       addl    %ebx,%eax
+       rorl    $7,%ecx
+       addl    %edi,%eax
+       movups  %xmm11,48(%r13,%r12,1)
+       leaq    64(%r12),%r12
+
+       addl    0(%r9),%eax
+       addl    4(%r9),%esi
+       addl    8(%r9),%ecx
+       addl    12(%r9),%edx
+       movl    %eax,0(%r9)
+       addl    16(%r9),%ebp
+       movl    %esi,4(%r9)
+       movl    %esi,%ebx
+       movl    %ecx,8(%r9)
+       movl    %edx,12(%r9)
+       movl    %ebp,16(%r9)
+       jmp     .Loop_ssse3
+
+.align 16
+.Ldone_ssse3:
+       addl    16(%rsp),%ebx
+       xorl    %eax,%esi
+       movl    %ecx,%edi
+       roll    $5,%ecx
+       xorl    %ebp,%esi
+       addl    %ecx,%ebx
+       rorl    $7,%edx
+       addl    %esi,%ebx
+       addl    20(%rsp),%eax
+       xorl    %ebp,%edi
+       movl    %ebx,%esi
+       roll    $5,%ebx
+       xorl    %edx,%edi
+       addl    %ebx,%eax
+       rorl    $7,%ecx
+       addl    %edi,%eax
+       addl    24(%rsp),%ebp
+.byte  102,69,15,56,220,222
+       movups  128(%r15),%xmm15
+       xorl    %edx,%esi
+       movl    %eax,%edi
+       roll    $5,%eax
+       xorl    %ecx,%esi
+       addl    %eax,%ebp
+       rorl    $7,%ebx
+       addl    %esi,%ebp
+       addl    28(%rsp),%edx
+       xorl    %ecx,%edi
+       movl    %ebp,%esi
+       roll    $5,%ebp
+       xorl    %ebx,%edi
+       addl    %ebp,%edx
+       rorl    $7,%eax
+       addl    %edi,%edx
+       addl    32(%rsp),%ecx
+       xorl    %ebx,%esi
+       movl    %edx,%edi
+       roll    $5,%edx
+       xorl    %eax,%esi
+.byte  102,69,15,56,220,223
+       movups  144(%r15),%xmm14
+       addl    %edx,%ecx
+       rorl    $7,%ebp
+       addl    %esi,%ecx
+       addl    36(%rsp),%ebx
+       xorl    %eax,%edi
+       movl    %ecx,%esi
+       roll    $5,%ecx
+       xorl    %ebp,%edi
+       addl    %ecx,%ebx
+       rorl    $7,%edx
+       addl    %edi,%ebx
+       addl    40(%rsp),%eax
+       xorl    %ebp,%esi
+       movl    %ebx,%edi
+       roll    $5,%ebx
+       xorl    %edx,%esi
+       addl    %ebx,%eax
+       rorl    $7,%ecx
+       addl    %esi,%eax
+       addl    44(%rsp),%ebp
+.byte  102,69,15,56,220,222
+       movups  160(%r15),%xmm15
+       xorl    %edx,%edi
+       movl    %eax,%esi
+       roll    $5,%eax
+       xorl    %ecx,%edi
+       addl    %eax,%ebp
+       rorl    $7,%ebx
+       addl    %edi,%ebp
+       addl    48(%rsp),%edx
+       xorl    %ecx,%esi
+       movl    %ebp,%edi
+       roll    $5,%ebp
+       xorl    %ebx,%esi
+       addl    %ebp,%edx
+       rorl    $7,%eax
+       addl    %esi,%edx
+       addl    52(%rsp),%ecx
+       xorl    %ebx,%edi
+       movl    %edx,%esi
+       roll    $5,%edx
+       xorl    %eax,%edi
+       cmpl    $11,%r8d
+       jb      .Laesenclast5
+       movups  176(%r15),%xmm14
+.byte  102,69,15,56,220,223
+       movups  192(%r15),%xmm15
+.byte  102,69,15,56,220,222
+       je      .Laesenclast5
+       movups  208(%r15),%xmm14
+.byte  102,69,15,56,220,223
+       movups  224(%r15),%xmm15
+.byte  102,69,15,56,220,222
+.Laesenclast5:
+.byte  102,69,15,56,221,223
+       movups  16(%r15),%xmm14
+       addl    %edx,%ecx
+       rorl    $7,%ebp
+       addl    %edi,%ecx
+       addl    56(%rsp),%ebx
+       xorl    %eax,%esi
+       movl    %ecx,%edi
+       roll    $5,%ecx
+       xorl    %ebp,%esi
+       addl    %ecx,%ebx
+       rorl    $7,%edx
+       addl    %esi,%ebx
+       addl    60(%rsp),%eax
+       xorl    %ebp,%edi
+       movl    %ebx,%esi
+       roll    $5,%ebx
+       xorl    %edx,%edi
+       addl    %ebx,%eax
+       rorl    $7,%ecx
+       addl    %edi,%eax
+       movups  %xmm11,48(%r13,%r12,1)
+       movq    88(%rsp),%r8
+
+       addl    0(%r9),%eax
+       addl    4(%r9),%esi
+       addl    8(%r9),%ecx
+       movl    %eax,0(%r9)
+       addl    12(%r9),%edx
+       movl    %esi,4(%r9)
+       addl    16(%r9),%ebp
+       movl    %ecx,8(%r9)
+       movl    %edx,12(%r9)
+       movl    %ebp,16(%r9)
+       movups  %xmm11,(%r8)
+       leaq    104(%rsp),%rsi
+       movq    0(%rsi),%r15
+       movq    8(%rsi),%r14
+       movq    16(%rsi),%r13
+       movq    24(%rsi),%r12
+       movq    32(%rsi),%rbp
+       movq    40(%rsi),%rbx
+       leaq    48(%rsi),%rsp
+.Lepilogue_ssse3:
+       .byte   0xf3,0xc3
+.size  aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3
+.align 64
+K_XX_XX:
+.long  0x5a827999,0x5a827999,0x5a827999,0x5a827999     
+.long  0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1     
+.long  0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc     
+.long  0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6     
+.long  0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f     
+
+.byte  65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
diff --git a/secure/lib/libcrypto/asm/aesni-x86.s b/secure/lib/libcrypto/asm/aesni-x86.s
new file mode 100644 (file)
index 0000000..4ceefd4
--- /dev/null
@@ -0,0 +1,2143 @@
+.file  "../../../../crypto/openssl/crypto/aes/asm/aesni-x86.s"
+.text
+.globl aesni_encrypt
+.type  aesni_encrypt,@function
+.align 16
+aesni_encrypt:
+.L_aesni_encrypt_begin:
+       movl    4(%esp),%eax
+       movl    12(%esp),%edx
+       movups  (%eax),%xmm2
+       movl    240(%edx),%ecx
+       movl    8(%esp),%eax
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+.L000enc1_loop_1:
+.byte  102,15,56,220,209
+       decl    %ecx
+       movups  (%edx),%xmm1
+       leal    16(%edx),%edx
+       jnz     .L000enc1_loop_1
+.byte  102,15,56,221,209
+       movups  %xmm2,(%eax)
+       ret
+.size  aesni_encrypt,.-.L_aesni_encrypt_begin
+.globl aesni_decrypt
+.type  aesni_decrypt,@function
+.align 16
+aesni_decrypt:
+.L_aesni_decrypt_begin:
+       movl    4(%esp),%eax
+       movl    12(%esp),%edx
+       movups  (%eax),%xmm2
+       movl    240(%edx),%ecx
+       movl    8(%esp),%eax
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+.L001dec1_loop_2:
+.byte  102,15,56,222,209
+       decl    %ecx
+       movups  (%edx),%xmm1
+       leal    16(%edx),%edx
+       jnz     .L001dec1_loop_2
+.byte  102,15,56,223,209
+       movups  %xmm2,(%eax)
+       ret
+.size  aesni_decrypt,.-.L_aesni_decrypt_begin
+.type  _aesni_encrypt3,@function
+.align 16
+_aesni_encrypt3:
+       movups  (%edx),%xmm0
+       shrl    $1,%ecx
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+       pxor    %xmm0,%xmm3
+       pxor    %xmm0,%xmm4
+       movups  (%edx),%xmm0
+.L002enc3_loop:
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+       decl    %ecx
+.byte  102,15,56,220,225
+       movups  16(%edx),%xmm1
+.byte  102,15,56,220,208
+.byte  102,15,56,220,216
+       leal    32(%edx),%edx
+.byte  102,15,56,220,224
+       movups  (%edx),%xmm0
+       jnz     .L002enc3_loop
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+.byte  102,15,56,220,225
+.byte  102,15,56,221,208
+.byte  102,15,56,221,216
+.byte  102,15,56,221,224
+       ret
+.size  _aesni_encrypt3,.-_aesni_encrypt3
+.type  _aesni_decrypt3,@function
+.align 16
+_aesni_decrypt3:
+       movups  (%edx),%xmm0
+       shrl    $1,%ecx
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+       pxor    %xmm0,%xmm3
+       pxor    %xmm0,%xmm4
+       movups  (%edx),%xmm0
+.L003dec3_loop:
+.byte  102,15,56,222,209
+.byte  102,15,56,222,217
+       decl    %ecx
+.byte  102,15,56,222,225
+       movups  16(%edx),%xmm1
+.byte  102,15,56,222,208
+.byte  102,15,56,222,216
+       leal    32(%edx),%edx
+.byte  102,15,56,222,224
+       movups  (%edx),%xmm0
+       jnz     .L003dec3_loop
+.byte  102,15,56,222,209
+.byte  102,15,56,222,217
+.byte  102,15,56,222,225
+.byte  102,15,56,223,208
+.byte  102,15,56,223,216
+.byte  102,15,56,223,224
+       ret
+.size  _aesni_decrypt3,.-_aesni_decrypt3
+.type  _aesni_encrypt4,@function
+.align 16
+_aesni_encrypt4:
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       shrl    $1,%ecx
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+       pxor    %xmm0,%xmm3
+       pxor    %xmm0,%xmm4
+       pxor    %xmm0,%xmm5
+       movups  (%edx),%xmm0
+.L004enc4_loop:
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+       decl    %ecx
+.byte  102,15,56,220,225
+.byte  102,15,56,220,233
+       movups  16(%edx),%xmm1
+.byte  102,15,56,220,208
+.byte  102,15,56,220,216
+       leal    32(%edx),%edx
+.byte  102,15,56,220,224
+.byte  102,15,56,220,232
+       movups  (%edx),%xmm0
+       jnz     .L004enc4_loop
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+.byte  102,15,56,220,225
+.byte  102,15,56,220,233
+.byte  102,15,56,221,208
+.byte  102,15,56,221,216
+.byte  102,15,56,221,224
+.byte  102,15,56,221,232
+       ret
+.size  _aesni_encrypt4,.-_aesni_encrypt4
+.type  _aesni_decrypt4,@function
+.align 16
+_aesni_decrypt4:
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       shrl    $1,%ecx
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+       pxor    %xmm0,%xmm3
+       pxor    %xmm0,%xmm4
+       pxor    %xmm0,%xmm5
+       movups  (%edx),%xmm0
+.L005dec4_loop:
+.byte  102,15,56,222,209
+.byte  102,15,56,222,217
+       decl    %ecx
+.byte  102,15,56,222,225
+.byte  102,15,56,222,233
+       movups  16(%edx),%xmm1
+.byte  102,15,56,222,208
+.byte  102,15,56,222,216
+       leal    32(%edx),%edx
+.byte  102,15,56,222,224
+.byte  102,15,56,222,232
+       movups  (%edx),%xmm0
+       jnz     .L005dec4_loop
+.byte  102,15,56,222,209
+.byte  102,15,56,222,217
+.byte  102,15,56,222,225
+.byte  102,15,56,222,233
+.byte  102,15,56,223,208
+.byte  102,15,56,223,216
+.byte  102,15,56,223,224
+.byte  102,15,56,223,232
+       ret
+.size  _aesni_decrypt4,.-_aesni_decrypt4
+.type  _aesni_encrypt6,@function
+.align 16
+_aesni_encrypt6:
+       movups  (%edx),%xmm0
+       shrl    $1,%ecx
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+       pxor    %xmm0,%xmm3
+.byte  102,15,56,220,209
+       pxor    %xmm0,%xmm4
+.byte  102,15,56,220,217
+       pxor    %xmm0,%xmm5
+       decl    %ecx
+.byte  102,15,56,220,225
+       pxor    %xmm0,%xmm6
+.byte  102,15,56,220,233
+       pxor    %xmm0,%xmm7
+.byte  102,15,56,220,241
+       movups  (%edx),%xmm0
+.byte  102,15,56,220,249
+       jmp     .L_aesni_encrypt6_enter
+.align 16
+.L006enc6_loop:
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+       decl    %ecx
+.byte  102,15,56,220,225
+.byte  102,15,56,220,233
+.byte  102,15,56,220,241
+.byte  102,15,56,220,249
+.align 16
+.L_aesni_encrypt6_enter:
+       movups  16(%edx),%xmm1
+.byte  102,15,56,220,208
+.byte  102,15,56,220,216
+       leal    32(%edx),%edx
+.byte  102,15,56,220,224
+.byte  102,15,56,220,232
+.byte  102,15,56,220,240
+.byte  102,15,56,220,248
+       movups  (%edx),%xmm0
+       jnz     .L006enc6_loop
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+.byte  102,15,56,220,225
+.byte  102,15,56,220,233
+.byte  102,15,56,220,241
+.byte  102,15,56,220,249
+.byte  102,15,56,221,208
+.byte  102,15,56,221,216
+.byte  102,15,56,221,224
+.byte  102,15,56,221,232
+.byte  102,15,56,221,240
+.byte  102,15,56,221,248
+       ret
+.size  _aesni_encrypt6,.-_aesni_encrypt6
+.type  _aesni_decrypt6,@function
+.align 16
+_aesni_decrypt6:
+       movups  (%edx),%xmm0
+       shrl    $1,%ecx
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+       pxor    %xmm0,%xmm3
+.byte  102,15,56,222,209
+       pxor    %xmm0,%xmm4
+.byte  102,15,56,222,217
+       pxor    %xmm0,%xmm5
+       decl    %ecx
+.byte  102,15,56,222,225
+       pxor    %xmm0,%xmm6
+.byte  102,15,56,222,233
+       pxor    %xmm0,%xmm7
+.byte  102,15,56,222,241
+       movups  (%edx),%xmm0
+.byte  102,15,56,222,249
+       jmp     .L_aesni_decrypt6_enter
+.align 16
+.L007dec6_loop:
+.byte  102,15,56,222,209
+.byte  102,15,56,222,217
+       decl    %ecx
+.byte  102,15,56,222,225
+.byte  102,15,56,222,233
+.byte  102,15,56,222,241
+.byte  102,15,56,222,249
+.align 16
+.L_aesni_decrypt6_enter:
+       movups  16(%edx),%xmm1
+.byte  102,15,56,222,208
+.byte  102,15,56,222,216
+       leal    32(%edx),%edx
+.byte  102,15,56,222,224
+.byte  102,15,56,222,232
+.byte  102,15,56,222,240
+.byte  102,15,56,222,248
+       movups  (%edx),%xmm0
+       jnz     .L007dec6_loop
+.byte  102,15,56,222,209
+.byte  102,15,56,222,217
+.byte  102,15,56,222,225
+.byte  102,15,56,222,233
+.byte  102,15,56,222,241
+.byte  102,15,56,222,249
+.byte  102,15,56,223,208
+.byte  102,15,56,223,216
+.byte  102,15,56,223,224
+.byte  102,15,56,223,232
+.byte  102,15,56,223,240
+.byte  102,15,56,223,248
+       ret
+.size  _aesni_decrypt6,.-_aesni_decrypt6
+.globl aesni_ecb_encrypt
+.type  aesni_ecb_encrypt,@function
+.align 16
+aesni_ecb_encrypt:
+.L_aesni_ecb_encrypt_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    20(%esp),%esi
+       movl    24(%esp),%edi
+       movl    28(%esp),%eax
+       movl    32(%esp),%edx
+       movl    36(%esp),%ebx
+       andl    $-16,%eax
+       jz      .L008ecb_ret
+       movl    240(%edx),%ecx
+       testl   %ebx,%ebx
+       jz      .L009ecb_decrypt
+       movl    %edx,%ebp
+       movl    %ecx,%ebx
+       cmpl    $96,%eax
+       jb      .L010ecb_enc_tail
+       movdqu  (%esi),%xmm2
+       movdqu  16(%esi),%xmm3
+       movdqu  32(%esi),%xmm4
+       movdqu  48(%esi),%xmm5
+       movdqu  64(%esi),%xmm6
+       movdqu  80(%esi),%xmm7
+       leal    96(%esi),%esi
+       subl    $96,%eax
+       jmp     .L011ecb_enc_loop6_enter
+.align 16
+.L012ecb_enc_loop6:
+       movups  %xmm2,(%edi)
+       movdqu  (%esi),%xmm2
+       movups  %xmm3,16(%edi)
+       movdqu  16(%esi),%xmm3
+       movups  %xmm4,32(%edi)
+       movdqu  32(%esi),%xmm4
+       movups  %xmm5,48(%edi)
+       movdqu  48(%esi),%xmm5
+       movups  %xmm6,64(%edi)
+       movdqu  64(%esi),%xmm6
+       movups  %xmm7,80(%edi)
+       leal    96(%edi),%edi
+       movdqu  80(%esi),%xmm7
+       leal    96(%esi),%esi
+.L011ecb_enc_loop6_enter:
+       call    _aesni_encrypt6
+       movl    %ebp,%edx
+       movl    %ebx,%ecx
+       subl    $96,%eax
+       jnc     .L012ecb_enc_loop6
+       movups  %xmm2,(%edi)
+       movups  %xmm3,16(%edi)
+       movups  %xmm4,32(%edi)
+       movups  %xmm5,48(%edi)
+       movups  %xmm6,64(%edi)
+       movups  %xmm7,80(%edi)
+       leal    96(%edi),%edi
+       addl    $96,%eax
+       jz      .L008ecb_ret
+.L010ecb_enc_tail:
+       movups  (%esi),%xmm2
+       cmpl    $32,%eax
+       jb      .L013ecb_enc_one
+       movups  16(%esi),%xmm3
+       je      .L014ecb_enc_two
+       movups  32(%esi),%xmm4
+       cmpl    $64,%eax
+       jb      .L015ecb_enc_three
+       movups  48(%esi),%xmm5
+       je      .L016ecb_enc_four
+       movups  64(%esi),%xmm6
+       xorps   %xmm7,%xmm7
+       call    _aesni_encrypt6
+       movups  %xmm2,(%edi)
+       movups  %xmm3,16(%edi)
+       movups  %xmm4,32(%edi)
+       movups  %xmm5,48(%edi)
+       movups  %xmm6,64(%edi)
+       jmp     .L008ecb_ret
+.align 16
+.L013ecb_enc_one:
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+.L017enc1_loop_3:
+.byte  102,15,56,220,209
+       decl    %ecx
+       movups  (%edx),%xmm1
+       leal    16(%edx),%edx
+       jnz     .L017enc1_loop_3
+.byte  102,15,56,221,209
+       movups  %xmm2,(%edi)
+       jmp     .L008ecb_ret
+.align 16
+.L014ecb_enc_two:
+       xorps   %xmm4,%xmm4
+       call    _aesni_encrypt3
+       movups  %xmm2,(%edi)
+       movups  %xmm3,16(%edi)
+       jmp     .L008ecb_ret
+.align 16
+.L015ecb_enc_three:
+       call    _aesni_encrypt3
+       movups  %xmm2,(%edi)
+       movups  %xmm3,16(%edi)
+       movups  %xmm4,32(%edi)
+       jmp     .L008ecb_ret
+.align 16
+.L016ecb_enc_four:
+       call    _aesni_encrypt4
+       movups  %xmm2,(%edi)
+       movups  %xmm3,16(%edi)
+       movups  %xmm4,32(%edi)
+       movups  %xmm5,48(%edi)
+       jmp     .L008ecb_ret
+.align 16
+.L009ecb_decrypt:
+       movl    %edx,%ebp
+       movl    %ecx,%ebx
+       cmpl    $96,%eax
+       jb      .L018ecb_dec_tail
+       movdqu  (%esi),%xmm2
+       movdqu  16(%esi),%xmm3
+       movdqu  32(%esi),%xmm4
+       movdqu  48(%esi),%xmm5
+       movdqu  64(%esi),%xmm6
+       movdqu  80(%esi),%xmm7
+       leal    96(%esi),%esi
+       subl    $96,%eax
+       jmp     .L019ecb_dec_loop6_enter
+.align 16
+.L020ecb_dec_loop6:
+       movups  %xmm2,(%edi)
+       movdqu  (%esi),%xmm2
+       movups  %xmm3,16(%edi)
+       movdqu  16(%esi),%xmm3
+       movups  %xmm4,32(%edi)
+       movdqu  32(%esi),%xmm4
+       movups  %xmm5,48(%edi)
+       movdqu  48(%esi),%xmm5
+       movups  %xmm6,64(%edi)
+       movdqu  64(%esi),%xmm6
+       movups  %xmm7,80(%edi)
+       leal    96(%edi),%edi
+       movdqu  80(%esi),%xmm7
+       leal    96(%esi),%esi
+.L019ecb_dec_loop6_enter:
+       call    _aesni_decrypt6
+       movl    %ebp,%edx
+       movl    %ebx,%ecx
+       subl    $96,%eax
+       jnc     .L020ecb_dec_loop6
+       movups  %xmm2,(%edi)
+       movups  %xmm3,16(%edi)
+       movups  %xmm4,32(%edi)
+       movups  %xmm5,48(%edi)
+       movups  %xmm6,64(%edi)
+       movups  %xmm7,80(%edi)
+       leal    96(%edi),%edi
+       addl    $96,%eax
+       jz      .L008ecb_ret
+.L018ecb_dec_tail:
+       movups  (%esi),%xmm2
+       cmpl    $32,%eax
+       jb      .L021ecb_dec_one
+       movups  16(%esi),%xmm3
+       je      .L022ecb_dec_two
+       movups  32(%esi),%xmm4
+       cmpl    $64,%eax
+       jb      .L023ecb_dec_three
+       movups  48(%esi),%xmm5
+       je      .L024ecb_dec_four
+       movups  64(%esi),%xmm6
+       xorps   %xmm7,%xmm7
+       call    _aesni_decrypt6
+       movups  %xmm2,(%edi)
+       movups  %xmm3,16(%edi)
+       movups  %xmm4,32(%edi)
+       movups  %xmm5,48(%edi)
+       movups  %xmm6,64(%edi)
+       jmp     .L008ecb_ret
+.align 16
+.L021ecb_dec_one:
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+.L025dec1_loop_4:
+.byte  102,15,56,222,209
+       decl    %ecx
+       movups  (%edx),%xmm1
+       leal    16(%edx),%edx
+       jnz     .L025dec1_loop_4
+.byte  102,15,56,223,209
+       movups  %xmm2,(%edi)
+       jmp     .L008ecb_ret
+.align 16
+.L022ecb_dec_two:
+       xorps   %xmm4,%xmm4
+       call    _aesni_decrypt3
+       movups  %xmm2,(%edi)
+       movups  %xmm3,16(%edi)
+       jmp     .L008ecb_ret
+.align 16
+.L023ecb_dec_three:
+       call    _aesni_decrypt3
+       movups  %xmm2,(%edi)
+       movups  %xmm3,16(%edi)
+       movups  %xmm4,32(%edi)
+       jmp     .L008ecb_ret
+.align 16
+.L024ecb_dec_four:
+       call    _aesni_decrypt4
+       movups  %xmm2,(%edi)
+       movups  %xmm3,16(%edi)
+       movups  %xmm4,32(%edi)
+       movups  %xmm5,48(%edi)
+.L008ecb_ret:
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin
+.globl aesni_ccm64_encrypt_blocks
+.type  aesni_ccm64_encrypt_blocks,@function
+.align 16
+aesni_ccm64_encrypt_blocks:
+.L_aesni_ccm64_encrypt_blocks_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    20(%esp),%esi
+       movl    24(%esp),%edi
+       movl    28(%esp),%eax
+       movl    32(%esp),%edx
+       movl    36(%esp),%ebx
+       movl    40(%esp),%ecx
+       movl    %esp,%ebp
+       subl    $60,%esp
+       andl    $-16,%esp
+       movl    %ebp,48(%esp)
+       movdqu  (%ebx),%xmm7
+       movdqu  (%ecx),%xmm3
+       movl    240(%edx),%ecx
+       movl    $202182159,(%esp)
+       movl    $134810123,4(%esp)
+       movl    $67438087,8(%esp)
+       movl    $66051,12(%esp)
+       movl    $1,%ebx
+       xorl    %ebp,%ebp
+       movl    %ebx,16(%esp)
+       movl    %ebp,20(%esp)
+       movl    %ebp,24(%esp)
+       movl    %ebp,28(%esp)
+       shrl    $1,%ecx
+       leal    (%edx),%ebp
+       movdqa  (%esp),%xmm5
+       movdqa  %xmm7,%xmm2
+       movl    %ecx,%ebx
+.byte  102,15,56,0,253
+.L026ccm64_enc_outer:
+       movups  (%ebp),%xmm0
+       movl    %ebx,%ecx
+       movups  (%esi),%xmm6
+       xorps   %xmm0,%xmm2
+       movups  16(%ebp),%xmm1
+       xorps   %xmm6,%xmm0
+       leal    32(%ebp),%edx
+       xorps   %xmm0,%xmm3
+       movups  (%edx),%xmm0
+.L027ccm64_enc2_loop:
+.byte  102,15,56,220,209
+       decl    %ecx
+.byte  102,15,56,220,217
+       movups  16(%edx),%xmm1
+.byte  102,15,56,220,208
+       leal    32(%edx),%edx
+.byte  102,15,56,220,216
+       movups  (%edx),%xmm0
+       jnz     .L027ccm64_enc2_loop
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+       paddq   16(%esp),%xmm7
+.byte  102,15,56,221,208
+.byte  102,15,56,221,216
+       decl    %eax
+       leal    16(%esi),%esi
+       xorps   %xmm2,%xmm6
+       movdqa  %xmm7,%xmm2
+       movups  %xmm6,(%edi)
+       leal    16(%edi),%edi
+.byte  102,15,56,0,213
+       jnz     .L026ccm64_enc_outer
+       movl    48(%esp),%esp
+       movl    40(%esp),%edi
+       movups  %xmm3,(%edi)
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin
+.globl aesni_ccm64_decrypt_blocks
+.type  aesni_ccm64_decrypt_blocks,@function
+.align 16
+aesni_ccm64_decrypt_blocks:
+.L_aesni_ccm64_decrypt_blocks_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    20(%esp),%esi
+       movl    24(%esp),%edi
+       movl    28(%esp),%eax
+       movl    32(%esp),%edx
+       movl    36(%esp),%ebx
+       movl    40(%esp),%ecx
+       movl    %esp,%ebp
+       subl    $60,%esp
+       andl    $-16,%esp
+       movl    %ebp,48(%esp)
+       movdqu  (%ebx),%xmm7
+       movdqu  (%ecx),%xmm3
+       movl    240(%edx),%ecx
+       movl    $202182159,(%esp)
+       movl    $134810123,4(%esp)
+       movl    $67438087,8(%esp)
+       movl    $66051,12(%esp)
+       movl    $1,%ebx
+       xorl    %ebp,%ebp
+       movl    %ebx,16(%esp)
+       movl    %ebp,20(%esp)
+       movl    %ebp,24(%esp)
+       movl    %ebp,28(%esp)
+       movdqa  (%esp),%xmm5
+       movdqa  %xmm7,%xmm2
+       movl    %edx,%ebp
+       movl    %ecx,%ebx
+.byte  102,15,56,0,253
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+.L028enc1_loop_5:
+.byte  102,15,56,220,209
+       decl    %ecx
+       movups  (%edx),%xmm1
+       leal    16(%edx),%edx
+       jnz     .L028enc1_loop_5
+.byte  102,15,56,221,209
+       movups  (%esi),%xmm6
+       paddq   16(%esp),%xmm7
+       leal    16(%esi),%esi
+       jmp     .L029ccm64_dec_outer
+.align 16
+.L029ccm64_dec_outer:
+       xorps   %xmm2,%xmm6
+       movdqa  %xmm7,%xmm2
+       movl    %ebx,%ecx
+       movups  %xmm6,(%edi)
+       leal    16(%edi),%edi
+.byte  102,15,56,0,213
+       subl    $1,%eax
+       jz      .L030ccm64_dec_break
+       movups  (%ebp),%xmm0
+       shrl    $1,%ecx
+       movups  16(%ebp),%xmm1
+       xorps   %xmm0,%xmm6
+       leal    32(%ebp),%edx
+       xorps   %xmm0,%xmm2
+       xorps   %xmm6,%xmm3
+       movups  (%edx),%xmm0
+.L031ccm64_dec2_loop:
+.byte  102,15,56,220,209
+       decl    %ecx
+.byte  102,15,56,220,217
+       movups  16(%edx),%xmm1
+.byte  102,15,56,220,208
+       leal    32(%edx),%edx
+.byte  102,15,56,220,216
+       movups  (%edx),%xmm0
+       jnz     .L031ccm64_dec2_loop
+       movups  (%esi),%xmm6
+       paddq   16(%esp),%xmm7
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+       leal    16(%esi),%esi
+.byte  102,15,56,221,208
+.byte  102,15,56,221,216
+       jmp     .L029ccm64_dec_outer
+.align 16
+.L030ccm64_dec_break:
+       movl    %ebp,%edx
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       xorps   %xmm0,%xmm6
+       leal    32(%edx),%edx
+       xorps   %xmm6,%xmm3
+.L032enc1_loop_6:
+.byte  102,15,56,220,217
+       decl    %ecx
+       movups  (%edx),%xmm1
+       leal    16(%edx),%edx
+       jnz     .L032enc1_loop_6
+.byte  102,15,56,221,217
+       movl    48(%esp),%esp
+       movl    40(%esp),%edi
+       movups  %xmm3,(%edi)
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin
+.globl aesni_ctr32_encrypt_blocks
+.type  aesni_ctr32_encrypt_blocks,@function
+.align 16
+aesni_ctr32_encrypt_blocks:
+.L_aesni_ctr32_encrypt_blocks_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    20(%esp),%esi
+       movl    24(%esp),%edi
+       movl    28(%esp),%eax
+       movl    32(%esp),%edx
+       movl    36(%esp),%ebx
+       movl    %esp,%ebp
+       subl    $88,%esp
+       andl    $-16,%esp
+       movl    %ebp,80(%esp)
+       cmpl    $1,%eax
+       je      .L033ctr32_one_shortcut
+       movdqu  (%ebx),%xmm7
+       movl    $202182159,(%esp)
+       movl    $134810123,4(%esp)
+       movl    $67438087,8(%esp)
+       movl    $66051,12(%esp)
+       movl    $6,%ecx
+       xorl    %ebp,%ebp
+       movl    %ecx,16(%esp)
+       movl    %ecx,20(%esp)
+       movl    %ecx,24(%esp)
+       movl    %ebp,28(%esp)
+.byte  102,15,58,22,251,3
+.byte  102,15,58,34,253,3
+       movl    240(%edx),%ecx
+       bswap   %ebx
+       pxor    %xmm1,%xmm1
+       pxor    %xmm0,%xmm0
+       movdqa  (%esp),%xmm2
+.byte  102,15,58,34,203,0
+       leal    3(%ebx),%ebp
+.byte  102,15,58,34,197,0
+       incl    %ebx
+.byte  102,15,58,34,203,1
+       incl    %ebp
+.byte  102,15,58,34,197,1
+       incl    %ebx
+.byte  102,15,58,34,203,2
+       incl    %ebp
+.byte  102,15,58,34,197,2
+       movdqa  %xmm1,48(%esp)
+.byte  102,15,56,0,202
+       movdqa  %xmm0,64(%esp)
+.byte  102,15,56,0,194
+       pshufd  $192,%xmm1,%xmm2
+       pshufd  $128,%xmm1,%xmm3
+       cmpl    $6,%eax
+       jb      .L034ctr32_tail
+       movdqa  %xmm7,32(%esp)
+       shrl    $1,%ecx
+       movl    %edx,%ebp
+       movl    %ecx,%ebx
+       subl    $6,%eax
+       jmp     .L035ctr32_loop6
+.align 16
+.L035ctr32_loop6:
+       pshufd  $64,%xmm1,%xmm4
+       movdqa  32(%esp),%xmm1
+       pshufd  $192,%xmm0,%xmm5
+       por     %xmm1,%xmm2
+       pshufd  $128,%xmm0,%xmm6
+       por     %xmm1,%xmm3
+       pshufd  $64,%xmm0,%xmm7
+       por     %xmm1,%xmm4
+       por     %xmm1,%xmm5
+       por     %xmm1,%xmm6
+       por     %xmm1,%xmm7
+       movups  (%ebp),%xmm0
+       movups  16(%ebp),%xmm1
+       leal    32(%ebp),%edx
+       decl    %ecx
+       pxor    %xmm0,%xmm2
+       pxor    %xmm0,%xmm3
+.byte  102,15,56,220,209
+       pxor    %xmm0,%xmm4
+.byte  102,15,56,220,217
+       pxor    %xmm0,%xmm5
+.byte  102,15,56,220,225
+       pxor    %xmm0,%xmm6
+.byte  102,15,56,220,233
+       pxor    %xmm0,%xmm7
+.byte  102,15,56,220,241
+       movups  (%edx),%xmm0
+.byte  102,15,56,220,249
+       call    .L_aesni_encrypt6_enter
+       movups  (%esi),%xmm1
+       movups  16(%esi),%xmm0
+       xorps   %xmm1,%xmm2
+       movups  32(%esi),%xmm1
+       xorps   %xmm0,%xmm3
+       movups  %xmm2,(%edi)
+       movdqa  16(%esp),%xmm0
+       xorps   %xmm1,%xmm4
+       movdqa  48(%esp),%xmm1
+       movups  %xmm3,16(%edi)
+       movups  %xmm4,32(%edi)
+       paddd   %xmm0,%xmm1
+       paddd   64(%esp),%xmm0
+       movdqa  (%esp),%xmm2
+       movups  48(%esi),%xmm3
+       movups  64(%esi),%xmm4
+       xorps   %xmm3,%xmm5
+       movups  80(%esi),%xmm3
+       leal    96(%esi),%esi
+       movdqa  %xmm1,48(%esp)
+.byte  102,15,56,0,202
+       xorps   %xmm4,%xmm6
+       movups  %xmm5,48(%edi)
+       xorps   %xmm3,%xmm7
+       movdqa  %xmm0,64(%esp)
+.byte  102,15,56,0,194
+       movups  %xmm6,64(%edi)
+       pshufd  $192,%xmm1,%xmm2
+       movups  %xmm7,80(%edi)
+       leal    96(%edi),%edi
+       movl    %ebx,%ecx
+       pshufd  $128,%xmm1,%xmm3
+       subl    $6,%eax
+       jnc     .L035ctr32_loop6
+       addl    $6,%eax
+       jz      .L036ctr32_ret
+       movl    %ebp,%edx
+       leal    1(,%ecx,2),%ecx
+       movdqa  32(%esp),%xmm7
+.L034ctr32_tail:
+       por     %xmm7,%xmm2
+       cmpl    $2,%eax
+       jb      .L037ctr32_one
+       pshufd  $64,%xmm1,%xmm4
+       por     %xmm7,%xmm3
+       je      .L038ctr32_two
+       pshufd  $192,%xmm0,%xmm5
+       por     %xmm7,%xmm4
+       cmpl    $4,%eax
+       jb      .L039ctr32_three
+       pshufd  $128,%xmm0,%xmm6
+       por     %xmm7,%xmm5
+       je      .L040ctr32_four
+       por     %xmm7,%xmm6
+       call    _aesni_encrypt6
+       movups  (%esi),%xmm1
+       movups  16(%esi),%xmm0
+       xorps   %xmm1,%xmm2
+       movups  32(%esi),%xmm1
+       xorps   %xmm0,%xmm3
+       movups  48(%esi),%xmm0
+       xorps   %xmm1,%xmm4
+       movups  64(%esi),%xmm1
+       xorps   %xmm0,%xmm5
+       movups  %xmm2,(%edi)
+       xorps   %xmm1,%xmm6
+       movups  %xmm3,16(%edi)
+       movups  %xmm4,32(%edi)
+       movups  %xmm5,48(%edi)
+       movups  %xmm6,64(%edi)
+       jmp     .L036ctr32_ret
+.align 16
+.L033ctr32_one_shortcut:
+       movups  (%ebx),%xmm2
+       movl    240(%edx),%ecx
+.L037ctr32_one:
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+.L041enc1_loop_7:
+.byte  102,15,56,220,209
+       decl    %ecx
+       movups  (%edx),%xmm1
+       leal    16(%edx),%edx
+       jnz     .L041enc1_loop_7
+.byte  102,15,56,221,209
+       movups  (%esi),%xmm6
+       xorps   %xmm2,%xmm6
+       movups  %xmm6,(%edi)
+       jmp     .L036ctr32_ret
+.align 16
+.L038ctr32_two:
+       call    _aesni_encrypt3
+       movups  (%esi),%xmm5
+       movups  16(%esi),%xmm6
+       xorps   %xmm5,%xmm2
+       xorps   %xmm6,%xmm3
+       movups  %xmm2,(%edi)
+       movups  %xmm3,16(%edi)
+       jmp     .L036ctr32_ret
+.align 16
+.L039ctr32_three:
+       call    _aesni_encrypt3
+       movups  (%esi),%xmm5
+       movups  16(%esi),%xmm6
+       xorps   %xmm5,%xmm2
+       movups  32(%esi),%xmm7
+       xorps   %xmm6,%xmm3
+       movups  %xmm2,(%edi)
+       xorps   %xmm7,%xmm4
+       movups  %xmm3,16(%edi)
+       movups  %xmm4,32(%edi)
+       jmp     .L036ctr32_ret
+.align 16
+.L040ctr32_four:
+       call    _aesni_encrypt4
+       movups  (%esi),%xmm6
+       movups  16(%esi),%xmm7
+       movups  32(%esi),%xmm1
+       xorps   %xmm6,%xmm2
+       movups  48(%esi),%xmm0
+       xorps   %xmm7,%xmm3
+       movups  %xmm2,(%edi)
+       xorps   %xmm1,%xmm4
+       movups  %xmm3,16(%edi)
+       xorps   %xmm0,%xmm5
+       movups  %xmm4,32(%edi)
+       movups  %xmm5,48(%edi)
+.L036ctr32_ret:
+       movl    80(%esp),%esp
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin
+.globl aesni_xts_encrypt
+.type  aesni_xts_encrypt,@function
+.align 16
+aesni_xts_encrypt:
+.L_aesni_xts_encrypt_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    36(%esp),%edx
+       movl    40(%esp),%esi
+       movl    240(%edx),%ecx
+       movups  (%esi),%xmm2
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+.L042enc1_loop_8:
+.byte  102,15,56,220,209
+       decl    %ecx
+       movups  (%edx),%xmm1
+       leal    16(%edx),%edx
+       jnz     .L042enc1_loop_8
+.byte  102,15,56,221,209
+       movl    20(%esp),%esi
+       movl    24(%esp),%edi
+       movl    28(%esp),%eax
+       movl    32(%esp),%edx
+       movl    %esp,%ebp
+       subl    $120,%esp
+       movl    240(%edx),%ecx
+       andl    $-16,%esp
+       movl    $135,96(%esp)
+       movl    $0,100(%esp)
+       movl    $1,104(%esp)
+       movl    $0,108(%esp)
+       movl    %eax,112(%esp)
+       movl    %ebp,116(%esp)
+       movdqa  %xmm2,%xmm1
+       pxor    %xmm0,%xmm0
+       movdqa  96(%esp),%xmm3
+       pcmpgtd %xmm1,%xmm0
+       andl    $-16,%eax
+       movl    %edx,%ebp
+       movl    %ecx,%ebx
+       subl    $96,%eax
+       jc      .L043xts_enc_short
+       shrl    $1,%ecx
+       movl    %ecx,%ebx
+       jmp     .L044xts_enc_loop6
+.align 16
+.L044xts_enc_loop6:
+       pshufd  $19,%xmm0,%xmm2
+       pxor    %xmm0,%xmm0
+       movdqa  %xmm1,(%esp)
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm2
+       pcmpgtd %xmm1,%xmm0
+       pxor    %xmm2,%xmm1
+       pshufd  $19,%xmm0,%xmm2
+       pxor    %xmm0,%xmm0
+       movdqa  %xmm1,16(%esp)
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm2
+       pcmpgtd %xmm1,%xmm0
+       pxor    %xmm2,%xmm1
+       pshufd  $19,%xmm0,%xmm2
+       pxor    %xmm0,%xmm0
+       movdqa  %xmm1,32(%esp)
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm2
+       pcmpgtd %xmm1,%xmm0
+       pxor    %xmm2,%xmm1
+       pshufd  $19,%xmm0,%xmm2
+       pxor    %xmm0,%xmm0
+       movdqa  %xmm1,48(%esp)
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm2
+       pcmpgtd %xmm1,%xmm0
+       pxor    %xmm2,%xmm1
+       pshufd  $19,%xmm0,%xmm7
+       movdqa  %xmm1,64(%esp)
+       paddq   %xmm1,%xmm1
+       movups  (%ebp),%xmm0
+       pand    %xmm3,%xmm7
+       movups  (%esi),%xmm2
+       pxor    %xmm1,%xmm7
+       movdqu  16(%esi),%xmm3
+       xorps   %xmm0,%xmm2
+       movdqu  32(%esi),%xmm4
+       pxor    %xmm0,%xmm3
+       movdqu  48(%esi),%xmm5
+       pxor    %xmm0,%xmm4
+       movdqu  64(%esi),%xmm6
+       pxor    %xmm0,%xmm5
+       movdqu  80(%esi),%xmm1
+       pxor    %xmm0,%xmm6
+       leal    96(%esi),%esi
+       pxor    (%esp),%xmm2
+       movdqa  %xmm7,80(%esp)
+       pxor    %xmm1,%xmm7
+       movups  16(%ebp),%xmm1
+       leal    32(%ebp),%edx
+       pxor    16(%esp),%xmm3
+.byte  102,15,56,220,209
+       pxor    32(%esp),%xmm4
+.byte  102,15,56,220,217
+       pxor    48(%esp),%xmm5
+       decl    %ecx
+.byte  102,15,56,220,225
+       pxor    64(%esp),%xmm6
+.byte  102,15,56,220,233
+       pxor    %xmm0,%xmm7
+.byte  102,15,56,220,241
+       movups  (%edx),%xmm0
+.byte  102,15,56,220,249
+       call    .L_aesni_encrypt6_enter
+       movdqa  80(%esp),%xmm1
+       pxor    %xmm0,%xmm0
+       xorps   (%esp),%xmm2
+       pcmpgtd %xmm1,%xmm0
+       xorps   16(%esp),%xmm3
+       movups  %xmm2,(%edi)
+       xorps   32(%esp),%xmm4
+       movups  %xmm3,16(%edi)
+       xorps   48(%esp),%xmm5
+       movups  %xmm4,32(%edi)
+       xorps   64(%esp),%xmm6
+       movups  %xmm5,48(%edi)
+       xorps   %xmm1,%xmm7
+       movups  %xmm6,64(%edi)
+       pshufd  $19,%xmm0,%xmm2
+       movups  %xmm7,80(%edi)
+       leal    96(%edi),%edi
+       movdqa  96(%esp),%xmm3
+       pxor    %xmm0,%xmm0
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm2
+       pcmpgtd %xmm1,%xmm0
+       movl    %ebx,%ecx
+       pxor    %xmm2,%xmm1
+       subl    $96,%eax
+       jnc     .L044xts_enc_loop6
+       leal    1(,%ecx,2),%ecx
+       movl    %ebp,%edx
+       movl    %ecx,%ebx
+.L043xts_enc_short:
+       addl    $96,%eax
+       jz      .L045xts_enc_done6x
+       movdqa  %xmm1,%xmm5
+       cmpl    $32,%eax
+       jb      .L046xts_enc_one
+       pshufd  $19,%xmm0,%xmm2
+       pxor    %xmm0,%xmm0
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm2
+       pcmpgtd %xmm1,%xmm0
+       pxor    %xmm2,%xmm1
+       je      .L047xts_enc_two
+       pshufd  $19,%xmm0,%xmm2
+       pxor    %xmm0,%xmm0
+       movdqa  %xmm1,%xmm6
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm2
+       pcmpgtd %xmm1,%xmm0
+       pxor    %xmm2,%xmm1
+       cmpl    $64,%eax
+       jb      .L048xts_enc_three
+       pshufd  $19,%xmm0,%xmm2
+       pxor    %xmm0,%xmm0
+       movdqa  %xmm1,%xmm7
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm2
+       pcmpgtd %xmm1,%xmm0
+       pxor    %xmm2,%xmm1
+       movdqa  %xmm5,(%esp)
+       movdqa  %xmm6,16(%esp)
+       je      .L049xts_enc_four
+       movdqa  %xmm7,32(%esp)
+       pshufd  $19,%xmm0,%xmm7
+       movdqa  %xmm1,48(%esp)
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm7
+       pxor    %xmm1,%xmm7
+       movdqu  (%esi),%xmm2
+       movdqu  16(%esi),%xmm3
+       movdqu  32(%esi),%xmm4
+       pxor    (%esp),%xmm2
+       movdqu  48(%esi),%xmm5
+       pxor    16(%esp),%xmm3
+       movdqu  64(%esi),%xmm6
+       pxor    32(%esp),%xmm4
+       leal    80(%esi),%esi
+       pxor    48(%esp),%xmm5
+       movdqa  %xmm7,64(%esp)
+       pxor    %xmm7,%xmm6
+       call    _aesni_encrypt6
+       movaps  64(%esp),%xmm1
+       xorps   (%esp),%xmm2
+       xorps   16(%esp),%xmm3
+       xorps   32(%esp),%xmm4
+       movups  %xmm2,(%edi)
+       xorps   48(%esp),%xmm5
+       movups  %xmm3,16(%edi)
+       xorps   %xmm1,%xmm6
+       movups  %xmm4,32(%edi)
+       movups  %xmm5,48(%edi)
+       movups  %xmm6,64(%edi)
+       leal    80(%edi),%edi
+       jmp     .L050xts_enc_done
+.align 16
+.L046xts_enc_one:
+       movups  (%esi),%xmm2
+       leal    16(%esi),%esi
+       xorps   %xmm5,%xmm2
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+.L051enc1_loop_9:
+.byte  102,15,56,220,209
+       decl    %ecx
+       movups  (%edx),%xmm1
+       leal    16(%edx),%edx
+       jnz     .L051enc1_loop_9
+.byte  102,15,56,221,209
+       xorps   %xmm5,%xmm2
+       movups  %xmm2,(%edi)
+       leal    16(%edi),%edi
+       movdqa  %xmm5,%xmm1
+       jmp     .L050xts_enc_done
+.align 16
+.L047xts_enc_two:
+       movaps  %xmm1,%xmm6
+       movups  (%esi),%xmm2
+       movups  16(%esi),%xmm3
+       leal    32(%esi),%esi
+       xorps   %xmm5,%xmm2
+       xorps   %xmm6,%xmm3
+       xorps   %xmm4,%xmm4
+       call    _aesni_encrypt3
+       xorps   %xmm5,%xmm2
+       xorps   %xmm6,%xmm3
+       movups  %xmm2,(%edi)
+       movups  %xmm3,16(%edi)
+       leal    32(%edi),%edi
+       movdqa  %xmm6,%xmm1
+       jmp     .L050xts_enc_done
+.align 16
+.L048xts_enc_three:
+       movaps  %xmm1,%xmm7
+       movups  (%esi),%xmm2
+       movups  16(%esi),%xmm3
+       movups  32(%esi),%xmm4
+       leal    48(%esi),%esi
+       xorps   %xmm5,%xmm2
+       xorps   %xmm6,%xmm3
+       xorps   %xmm7,%xmm4
+       call    _aesni_encrypt3
+       xorps   %xmm5,%xmm2
+       xorps   %xmm6,%xmm3
+       xorps   %xmm7,%xmm4
+       movups  %xmm2,(%edi)
+       movups  %xmm3,16(%edi)
+       movups  %xmm4,32(%edi)
+       leal    48(%edi),%edi
+       movdqa  %xmm7,%xmm1
+       jmp     .L050xts_enc_done
+.align 16
+.L049xts_enc_four:
+       movaps  %xmm1,%xmm6
+       movups  (%esi),%xmm2
+       movups  16(%esi),%xmm3
+       movups  32(%esi),%xmm4
+       xorps   (%esp),%xmm2
+       movups  48(%esi),%xmm5
+       leal    64(%esi),%esi
+       xorps   16(%esp),%xmm3
+       xorps   %xmm7,%xmm4
+       xorps   %xmm6,%xmm5
+       call    _aesni_encrypt4
+       xorps   (%esp),%xmm2
+       xorps   16(%esp),%xmm3
+       xorps   %xmm7,%xmm4
+       movups  %xmm2,(%edi)
+       xorps   %xmm6,%xmm5
+       movups  %xmm3,16(%edi)
+       movups  %xmm4,32(%edi)
+       movups  %xmm5,48(%edi)
+       leal    64(%edi),%edi
+       movdqa  %xmm6,%xmm1
+       jmp     .L050xts_enc_done
+.align 16
+.L045xts_enc_done6x:
+       movl    112(%esp),%eax
+       andl    $15,%eax
+       jz      .L052xts_enc_ret
+       movdqa  %xmm1,%xmm5
+       movl    %eax,112(%esp)
+       jmp     .L053xts_enc_steal
+.align 16
+.L050xts_enc_done:
+       movl    112(%esp),%eax
+       pxor    %xmm0,%xmm0
+       andl    $15,%eax
+       jz      .L052xts_enc_ret
+       pcmpgtd %xmm1,%xmm0
+       movl    %eax,112(%esp)
+       pshufd  $19,%xmm0,%xmm5
+       paddq   %xmm1,%xmm1
+       pand    96(%esp),%xmm5
+       pxor    %xmm1,%xmm5
+.L053xts_enc_steal:
+       movzbl  (%esi),%ecx
+       movzbl  -16(%edi),%edx
+       leal    1(%esi),%esi
+       movb    %cl,-16(%edi)
+       movb    %dl,(%edi)
+       leal    1(%edi),%edi
+       subl    $1,%eax
+       jnz     .L053xts_enc_steal
+       subl    112(%esp),%edi
+       movl    %ebp,%edx
+       movl    %ebx,%ecx
+       movups  -16(%edi),%xmm2
+       xorps   %xmm5,%xmm2
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+.L054enc1_loop_10:
+.byte  102,15,56,220,209
+       decl    %ecx
+       movups  (%edx),%xmm1
+       leal    16(%edx),%edx
+       jnz     .L054enc1_loop_10
+.byte  102,15,56,221,209
+       xorps   %xmm5,%xmm2
+       movups  %xmm2,-16(%edi)
+.L052xts_enc_ret:
+       movl    116(%esp),%esp
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin
+.globl aesni_xts_decrypt
+.type  aesni_xts_decrypt,@function
+.align 16
+aesni_xts_decrypt:
+.L_aesni_xts_decrypt_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    36(%esp),%edx
+       movl    40(%esp),%esi
+       movl    240(%edx),%ecx
+       movups  (%esi),%xmm2
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+.L055enc1_loop_11:
+.byte  102,15,56,220,209
+       decl    %ecx
+       movups  (%edx),%xmm1
+       leal    16(%edx),%edx
+       jnz     .L055enc1_loop_11
+.byte  102,15,56,221,209
+       movl    20(%esp),%esi
+       movl    24(%esp),%edi
+       movl    28(%esp),%eax
+       movl    32(%esp),%edx
+       movl    %esp,%ebp
+       subl    $120,%esp
+       andl    $-16,%esp
+       xorl    %ebx,%ebx
+       testl   $15,%eax
+       setnz   %bl
+       shll    $4,%ebx
+       subl    %ebx,%eax
+       movl    $135,96(%esp)
+       movl    $0,100(%esp)
+       movl    $1,104(%esp)
+       movl    $0,108(%esp)
+       movl    %eax,112(%esp)
+       movl    %ebp,116(%esp)
+       movl    240(%edx),%ecx
+       movl    %edx,%ebp
+       movl    %ecx,%ebx
+       movdqa  %xmm2,%xmm1
+       pxor    %xmm0,%xmm0
+       movdqa  96(%esp),%xmm3
+       pcmpgtd %xmm1,%xmm0
+       andl    $-16,%eax
+       subl    $96,%eax
+       jc      .L056xts_dec_short
+       shrl    $1,%ecx
+       movl    %ecx,%ebx
+       jmp     .L057xts_dec_loop6
+.align 16
+.L057xts_dec_loop6:
+       pshufd  $19,%xmm0,%xmm2
+       pxor    %xmm0,%xmm0
+       movdqa  %xmm1,(%esp)
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm2
+       pcmpgtd %xmm1,%xmm0
+       pxor    %xmm2,%xmm1
+       pshufd  $19,%xmm0,%xmm2
+       pxor    %xmm0,%xmm0
+       movdqa  %xmm1,16(%esp)
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm2
+       pcmpgtd %xmm1,%xmm0
+       pxor    %xmm2,%xmm1
+       pshufd  $19,%xmm0,%xmm2
+       pxor    %xmm0,%xmm0
+       movdqa  %xmm1,32(%esp)
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm2
+       pcmpgtd %xmm1,%xmm0
+       pxor    %xmm2,%xmm1
+       pshufd  $19,%xmm0,%xmm2
+       pxor    %xmm0,%xmm0
+       movdqa  %xmm1,48(%esp)
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm2
+       pcmpgtd %xmm1,%xmm0
+       pxor    %xmm2,%xmm1
+       pshufd  $19,%xmm0,%xmm7
+       movdqa  %xmm1,64(%esp)
+       paddq   %xmm1,%xmm1
+       movups  (%ebp),%xmm0
+       pand    %xmm3,%xmm7
+       movups  (%esi),%xmm2
+       pxor    %xmm1,%xmm7
+       movdqu  16(%esi),%xmm3
+       xorps   %xmm0,%xmm2
+       movdqu  32(%esi),%xmm4
+       pxor    %xmm0,%xmm3
+       movdqu  48(%esi),%xmm5
+       pxor    %xmm0,%xmm4
+       movdqu  64(%esi),%xmm6
+       pxor    %xmm0,%xmm5
+       movdqu  80(%esi),%xmm1
+       pxor    %xmm0,%xmm6
+       leal    96(%esi),%esi
+       pxor    (%esp),%xmm2
+       movdqa  %xmm7,80(%esp)
+       pxor    %xmm1,%xmm7
+       movups  16(%ebp),%xmm1
+       leal    32(%ebp),%edx
+       pxor    16(%esp),%xmm3
+.byte  102,15,56,222,209
+       pxor    32(%esp),%xmm4
+.byte  102,15,56,222,217
+       pxor    48(%esp),%xmm5
+       decl    %ecx
+.byte  102,15,56,222,225
+       pxor    64(%esp),%xmm6
+.byte  102,15,56,222,233
+       pxor    %xmm0,%xmm7
+.byte  102,15,56,222,241
+       movups  (%edx),%xmm0
+.byte  102,15,56,222,249
+       call    .L_aesni_decrypt6_enter
+       movdqa  80(%esp),%xmm1
+       pxor    %xmm0,%xmm0
+       xorps   (%esp),%xmm2
+       pcmpgtd %xmm1,%xmm0
+       xorps   16(%esp),%xmm3
+       movups  %xmm2,(%edi)
+       xorps   32(%esp),%xmm4
+       movups  %xmm3,16(%edi)
+       xorps   48(%esp),%xmm5
+       movups  %xmm4,32(%edi)
+       xorps   64(%esp),%xmm6
+       movups  %xmm5,48(%edi)
+       xorps   %xmm1,%xmm7
+       movups  %xmm6,64(%edi)
+       pshufd  $19,%xmm0,%xmm2
+       movups  %xmm7,80(%edi)
+       leal    96(%edi),%edi
+       movdqa  96(%esp),%xmm3
+       pxor    %xmm0,%xmm0
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm2
+       pcmpgtd %xmm1,%xmm0
+       movl    %ebx,%ecx
+       pxor    %xmm2,%xmm1
+       subl    $96,%eax
+       jnc     .L057xts_dec_loop6
+       leal    1(,%ecx,2),%ecx
+       movl    %ebp,%edx
+       movl    %ecx,%ebx
+.L056xts_dec_short:
+       addl    $96,%eax
+       jz      .L058xts_dec_done6x
+       movdqa  %xmm1,%xmm5
+       cmpl    $32,%eax
+       jb      .L059xts_dec_one
+       pshufd  $19,%xmm0,%xmm2
+       pxor    %xmm0,%xmm0
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm2
+       pcmpgtd %xmm1,%xmm0
+       pxor    %xmm2,%xmm1
+       je      .L060xts_dec_two
+       pshufd  $19,%xmm0,%xmm2
+       pxor    %xmm0,%xmm0
+       movdqa  %xmm1,%xmm6
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm2
+       pcmpgtd %xmm1,%xmm0
+       pxor    %xmm2,%xmm1
+       cmpl    $64,%eax
+       jb      .L061xts_dec_three
+       pshufd  $19,%xmm0,%xmm2
+       pxor    %xmm0,%xmm0
+       movdqa  %xmm1,%xmm7
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm2
+       pcmpgtd %xmm1,%xmm0
+       pxor    %xmm2,%xmm1
+       movdqa  %xmm5,(%esp)
+       movdqa  %xmm6,16(%esp)
+       je      .L062xts_dec_four
+       movdqa  %xmm7,32(%esp)
+       pshufd  $19,%xmm0,%xmm7
+       movdqa  %xmm1,48(%esp)
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm7
+       pxor    %xmm1,%xmm7
+       movdqu  (%esi),%xmm2
+       movdqu  16(%esi),%xmm3
+       movdqu  32(%esi),%xmm4
+       pxor    (%esp),%xmm2
+       movdqu  48(%esi),%xmm5
+       pxor    16(%esp),%xmm3
+       movdqu  64(%esi),%xmm6
+       pxor    32(%esp),%xmm4
+       leal    80(%esi),%esi
+       pxor    48(%esp),%xmm5
+       movdqa  %xmm7,64(%esp)
+       pxor    %xmm7,%xmm6
+       call    _aesni_decrypt6
+       movaps  64(%esp),%xmm1
+       xorps   (%esp),%xmm2
+       xorps   16(%esp),%xmm3
+       xorps   32(%esp),%xmm4
+       movups  %xmm2,(%edi)
+       xorps   48(%esp),%xmm5
+       movups  %xmm3,16(%edi)
+       xorps   %xmm1,%xmm6
+       movups  %xmm4,32(%edi)
+       movups  %xmm5,48(%edi)
+       movups  %xmm6,64(%edi)
+       leal    80(%edi),%edi
+       jmp     .L063xts_dec_done
+.align 16
+.L059xts_dec_one:
+       movups  (%esi),%xmm2
+       leal    16(%esi),%esi
+       xorps   %xmm5,%xmm2
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+.L064dec1_loop_12:
+.byte  102,15,56,222,209
+       decl    %ecx
+       movups  (%edx),%xmm1
+       leal    16(%edx),%edx
+       jnz     .L064dec1_loop_12
+.byte  102,15,56,223,209
+       xorps   %xmm5,%xmm2
+       movups  %xmm2,(%edi)
+       leal    16(%edi),%edi
+       movdqa  %xmm5,%xmm1
+       jmp     .L063xts_dec_done
+.align 16
+.L060xts_dec_two:
+       movaps  %xmm1,%xmm6
+       movups  (%esi),%xmm2
+       movups  16(%esi),%xmm3
+       leal    32(%esi),%esi
+       xorps   %xmm5,%xmm2
+       xorps   %xmm6,%xmm3
+       call    _aesni_decrypt3
+       xorps   %xmm5,%xmm2
+       xorps   %xmm6,%xmm3
+       movups  %xmm2,(%edi)
+       movups  %xmm3,16(%edi)
+       leal    32(%edi),%edi
+       movdqa  %xmm6,%xmm1
+       jmp     .L063xts_dec_done
+.align 16
+.L061xts_dec_three:
+       movaps  %xmm1,%xmm7
+       movups  (%esi),%xmm2
+       movups  16(%esi),%xmm3
+       movups  32(%esi),%xmm4
+       leal    48(%esi),%esi
+       xorps   %xmm5,%xmm2
+       xorps   %xmm6,%xmm3
+       xorps   %xmm7,%xmm4
+       call    _aesni_decrypt3
+       xorps   %xmm5,%xmm2
+       xorps   %xmm6,%xmm3
+       xorps   %xmm7,%xmm4
+       movups  %xmm2,(%edi)
+       movups  %xmm3,16(%edi)
+       movups  %xmm4,32(%edi)
+       leal    48(%edi),%edi
+       movdqa  %xmm7,%xmm1
+       jmp     .L063xts_dec_done
+.align 16
+.L062xts_dec_four:
+       movaps  %xmm1,%xmm6
+       movups  (%esi),%xmm2
+       movups  16(%esi),%xmm3
+       movups  32(%esi),%xmm4
+       xorps   (%esp),%xmm2
+       movups  48(%esi),%xmm5
+       leal    64(%esi),%esi
+       xorps   16(%esp),%xmm3
+       xorps   %xmm7,%xmm4
+       xorps   %xmm6,%xmm5
+       call    _aesni_decrypt4
+       xorps   (%esp),%xmm2
+       xorps   16(%esp),%xmm3
+       xorps   %xmm7,%xmm4
+       movups  %xmm2,(%edi)
+       xorps   %xmm6,%xmm5
+       movups  %xmm3,16(%edi)
+       movups  %xmm4,32(%edi)
+       movups  %xmm5,48(%edi)
+       leal    64(%edi),%edi
+       movdqa  %xmm6,%xmm1
+       jmp     .L063xts_dec_done
+.align 16
+.L058xts_dec_done6x:
+       movl    112(%esp),%eax
+       andl    $15,%eax
+       jz      .L065xts_dec_ret
+       movl    %eax,112(%esp)
+       jmp     .L066xts_dec_only_one_more
+.align 16
+.L063xts_dec_done:
+       movl    112(%esp),%eax
+       pxor    %xmm0,%xmm0
+       andl    $15,%eax
+       jz      .L065xts_dec_ret
+       pcmpgtd %xmm1,%xmm0
+       movl    %eax,112(%esp)
+       pshufd  $19,%xmm0,%xmm2
+       pxor    %xmm0,%xmm0
+       movdqa  96(%esp),%xmm3
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm2
+       pcmpgtd %xmm1,%xmm0
+       pxor    %xmm2,%xmm1
+.L066xts_dec_only_one_more:
+       pshufd  $19,%xmm0,%xmm5
+       movdqa  %xmm1,%xmm6
+       paddq   %xmm1,%xmm1
+       pand    %xmm3,%xmm5
+       pxor    %xmm1,%xmm5
+       movl    %ebp,%edx
+       movl    %ebx,%ecx
+       movups  (%esi),%xmm2
+       xorps   %xmm5,%xmm2
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+.L067dec1_loop_13:
+.byte  102,15,56,222,209
+       decl    %ecx
+       movups  (%edx),%xmm1
+       leal    16(%edx),%edx
+       jnz     .L067dec1_loop_13
+.byte  102,15,56,223,209
+       xorps   %xmm5,%xmm2
+       movups  %xmm2,(%edi)
+.L068xts_dec_steal:
+       movzbl  16(%esi),%ecx
+       movzbl  (%edi),%edx
+       leal    1(%esi),%esi
+       movb    %cl,(%edi)
+       movb    %dl,16(%edi)
+       leal    1(%edi),%edi
+       subl    $1,%eax
+       jnz     .L068xts_dec_steal
+       subl    112(%esp),%edi
+       movl    %ebp,%edx
+       movl    %ebx,%ecx
+       movups  (%edi),%xmm2
+       xorps   %xmm6,%xmm2
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+.L069dec1_loop_14:
+.byte  102,15,56,222,209
+       decl    %ecx
+       movups  (%edx),%xmm1
+       leal    16(%edx),%edx
+       jnz     .L069dec1_loop_14
+.byte  102,15,56,223,209
+       xorps   %xmm6,%xmm2
+       movups  %xmm2,(%edi)
+.L065xts_dec_ret:
+       movl    116(%esp),%esp
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin
+.globl aesni_cbc_encrypt
+.type  aesni_cbc_encrypt,@function
+.align 16
+aesni_cbc_encrypt:
+.L_aesni_cbc_encrypt_begin:
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+       movl    20(%esp),%esi
+       movl    %esp,%ebx
+       movl    24(%esp),%edi
+       subl    $24,%ebx
+       movl    28(%esp),%eax
+       andl    $-16,%ebx
+       movl    32(%esp),%edx
+       movl    36(%esp),%ebp
+       testl   %eax,%eax
+       jz      .L070cbc_abort
+       cmpl    $0,40(%esp)
+       xchgl   %esp,%ebx
+       movups  (%ebp),%xmm7
+       movl    240(%edx),%ecx
+       movl    %edx,%ebp
+       movl    %ebx,16(%esp)
+       movl    %ecx,%ebx
+       je      .L071cbc_decrypt
+       movaps  %xmm7,%xmm2
+       cmpl    $16,%eax
+       jb      .L072cbc_enc_tail
+       subl    $16,%eax
+       jmp     .L073cbc_enc_loop
+.align 16
+.L073cbc_enc_loop:
+       movups  (%esi),%xmm7
+       leal    16(%esi),%esi
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       xorps   %xmm0,%xmm7
+       leal    32(%edx),%edx
+       xorps   %xmm7,%xmm2
+.L074enc1_loop_15:
+.byte  102,15,56,220,209
+       decl    %ecx
+       movups  (%edx),%xmm1
+       leal    16(%edx),%edx
+       jnz     .L074enc1_loop_15
+.byte  102,15,56,221,209
+       movl    %ebx,%ecx
+       movl    %ebp,%edx
+       movups  %xmm2,(%edi)
+       leal    16(%edi),%edi
+       subl    $16,%eax
+       jnc     .L073cbc_enc_loop
+       addl    $16,%eax
+       jnz     .L072cbc_enc_tail
+       movaps  %xmm2,%xmm7
+       jmp     .L075cbc_ret
+.L072cbc_enc_tail:
+       movl    %eax,%ecx
+.long  2767451785
+       movl    $16,%ecx
+       subl    %eax,%ecx
+       xorl    %eax,%eax
+.long  2868115081
+       leal    -16(%edi),%edi
+       movl    %ebx,%ecx
+       movl    %edi,%esi
+       movl    %ebp,%edx
+       jmp     .L073cbc_enc_loop
+.align 16
+.L071cbc_decrypt:
+       cmpl    $80,%eax
+       jbe     .L076cbc_dec_tail
+       movaps  %xmm7,(%esp)
+       subl    $80,%eax
+       jmp     .L077cbc_dec_loop6_enter
+.align 16
+.L078cbc_dec_loop6:
+       movaps  %xmm0,(%esp)
+       movups  %xmm7,(%edi)
+       leal    16(%edi),%edi
+.L077cbc_dec_loop6_enter:
+       movdqu  (%esi),%xmm2
+       movdqu  16(%esi),%xmm3
+       movdqu  32(%esi),%xmm4
+       movdqu  48(%esi),%xmm5
+       movdqu  64(%esi),%xmm6
+       movdqu  80(%esi),%xmm7
+       call    _aesni_decrypt6
+       movups  (%esi),%xmm1
+       movups  16(%esi),%xmm0
+       xorps   (%esp),%xmm2
+       xorps   %xmm1,%xmm3
+       movups  32(%esi),%xmm1
+       xorps   %xmm0,%xmm4
+       movups  48(%esi),%xmm0
+       xorps   %xmm1,%xmm5
+       movups  64(%esi),%xmm1
+       xorps   %xmm0,%xmm6
+       movups  80(%esi),%xmm0
+       xorps   %xmm1,%xmm7
+       movups  %xmm2,(%edi)
+       movups  %xmm3,16(%edi)
+       leal    96(%esi),%esi
+       movups  %xmm4,32(%edi)
+       movl    %ebx,%ecx
+       movups  %xmm5,48(%edi)
+       movl    %ebp,%edx
+       movups  %xmm6,64(%edi)
+       leal    80(%edi),%edi
+       subl    $96,%eax
+       ja      .L078cbc_dec_loop6
+       movaps  %xmm7,%xmm2
+       movaps  %xmm0,%xmm7
+       addl    $80,%eax
+       jle     .L079cbc_dec_tail_collected
+       movups  %xmm2,(%edi)
+       leal    16(%edi),%edi
+.L076cbc_dec_tail:
+       movups  (%esi),%xmm2
+       movaps  %xmm2,%xmm6
+       cmpl    $16,%eax
+       jbe     .L080cbc_dec_one
+       movups  16(%esi),%xmm3
+       movaps  %xmm3,%xmm5
+       cmpl    $32,%eax
+       jbe     .L081cbc_dec_two
+       movups  32(%esi),%xmm4
+       cmpl    $48,%eax
+       jbe     .L082cbc_dec_three
+       movups  48(%esi),%xmm5
+       cmpl    $64,%eax
+       jbe     .L083cbc_dec_four
+       movups  64(%esi),%xmm6
+       movaps  %xmm7,(%esp)
+       movups  (%esi),%xmm2
+       xorps   %xmm7,%xmm7
+       call    _aesni_decrypt6
+       movups  (%esi),%xmm1
+       movups  16(%esi),%xmm0
+       xorps   (%esp),%xmm2
+       xorps   %xmm1,%xmm3
+       movups  32(%esi),%xmm1
+       xorps   %xmm0,%xmm4
+       movups  48(%esi),%xmm0
+       xorps   %xmm1,%xmm5
+       movups  64(%esi),%xmm7
+       xorps   %xmm0,%xmm6
+       movups  %xmm2,(%edi)
+       movups  %xmm3,16(%edi)
+       movups  %xmm4,32(%edi)
+       movups  %xmm5,48(%edi)
+       leal    64(%edi),%edi
+       movaps  %xmm6,%xmm2
+       subl    $80,%eax
+       jmp     .L079cbc_dec_tail_collected
+.align 16
+.L080cbc_dec_one:
+       movups  (%edx),%xmm0
+       movups  16(%edx),%xmm1
+       leal    32(%edx),%edx
+       xorps   %xmm0,%xmm2
+.L084dec1_loop_16:
+.byte  102,15,56,222,209
+       decl    %ecx
+       movups  (%edx),%xmm1
+       leal    16(%edx),%edx
+       jnz     .L084dec1_loop_16
+.byte  102,15,56,223,209
+       xorps   %xmm7,%xmm2
+       movaps  %xmm6,%xmm7
+       subl    $16,%eax
+       jmp     .L079cbc_dec_tail_collected
+.align 16
+.L081cbc_dec_two:
+       xorps   %xmm4,%xmm4
+       call    _aesni_decrypt3
+       xorps   %xmm7,%xmm2
+       xorps   %xmm6,%xmm3
+       movups  %xmm2,(%edi)
+       movaps  %xmm3,%xmm2
+       leal    16(%edi),%edi
+       movaps  %xmm5,%xmm7
+       subl    $32,%eax
+       jmp     .L079cbc_dec_tail_collected
+.align 16
+.L082cbc_dec_three:
+       call    _aesni_decrypt3
+       xorps   %xmm7,%xmm2
+       xorps   %xmm6,%xmm3
+       xorps   %xmm5,%xmm4
+       movups  %xmm2,(%edi)
+       movaps  %xmm4,%xmm2
+       movups  %xmm3,16(%edi)
+       leal    32(%edi),%edi
+       movups  32(%esi),%xmm7
+       subl    $48,%eax
+       jmp     .L079cbc_dec_tail_collected
+.align 16
+.L083cbc_dec_four:
+       call    _aesni_decrypt4
+       movups  16(%esi),%xmm1
+       movups  32(%esi),%xmm0
+       xorps   %xmm7,%xmm2
+       movups  48(%esi),%xmm7
+       xorps   %xmm6,%xmm3
+       movups  %xmm2,(%edi)
+       xorps   %xmm1,%xmm4
+       movups  %xmm3,16(%edi)
+       xorps   %xmm0,%xmm5
+       movups  %xmm4,32(%edi)
+       leal    48(%edi),%edi
+       movaps  %xmm5,%xmm2
+       subl    $64,%eax
+.L079cbc_dec_tail_collected:
+       andl    $15,%eax
+       jnz     .L085cbc_dec_tail_partial
+       movups  %xmm2,(%edi)
+       jmp     .L075cbc_ret
+.align 16
+.L085cbc_dec_tail_partial:
+       movaps  %xmm2,(%esp)
+       movl    $16,%ecx
+       movl    %esp,%esi
+       subl    %eax,%ecx
+.long  2767451785
+.L075cbc_ret:
+       movl    16(%esp),%esp
+       movl    36(%esp),%ebp
+       movups  %xmm7,(%ebp)
+.L070cbc_abort:
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+.size  aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin
+.type  _aesni_set_encrypt_key,@function
+.align 16
+_aesni_set_encrypt_key:
+       testl   %eax,%eax
+       jz      .L086bad_pointer
+       testl   %edx,%edx
+       jz      .L086bad_pointer
+       movups  (%eax),%xmm0
+       xorps   %xmm4,%xmm4
+       leal    16(%edx),%edx
+       cmpl    $256,%ecx
+       je      .L08714rounds
+       cmpl    $192,%ecx
+       je      .L08812rounds
+       cmpl    $128,%ecx
+       jne     .L089bad_keybits
+.align 16
+.L09010rounds:
+       movl    $9,%ecx
+       movups  %xmm0,-16(%edx)
+.byte  102,15,58,223,200,1
+       call    .L091key_128_cold
+.byte  102,15,58,223,200,2
+       call    .L092key_128
+.byte  102,15,58,223,200,4
+       call    .L092key_128
+.byte  102,15,58,223,200,8
+       call    .L092key_128
+.byte  102,15,58,223,200,16
+       call    .L092key_128
+.byte  102,15,58,223,200,32
+       call    .L092key_128
+.byte  102,15,58,223,200,64
+       call    .L092key_128
+.byte  102,15,58,223,200,128
+       call    .L092key_128
+.byte  102,15,58,223,200,27
+       call    .L092key_128
+.byte  102,15,58,223,200,54
+       call    .L092key_128
+       movups  %xmm0,(%edx)
+       movl    %ecx,80(%edx)
+       xorl    %eax,%eax
+       ret
+.align 16
+.L092key_128:
+       movups  %xmm0,(%edx)
+       leal    16(%edx),%edx
+.L091key_128_cold:
+       shufps  $16,%xmm0,%xmm4
+       xorps   %xmm4,%xmm0
+       shufps  $140,%xmm0,%xmm4
+       xorps   %xmm4,%xmm0
+       shufps  $255,%xmm1,%xmm1
+       xorps   %xmm1,%xmm0
+       ret
+.align 16
+.L08812rounds:
+       movq    16(%eax),%xmm2
+       movl    $11,%ecx
+       movups  %xmm0,-16(%edx)
+.byte  102,15,58,223,202,1
+       call    .L093key_192a_cold
+.byte  102,15,58,223,202,2
+       call    .L094key_192b
+.byte  102,15,58,223,202,4
+       call    .L095key_192a
+.byte  102,15,58,223,202,8
+       call    .L094key_192b
+.byte  102,15,58,223,202,16
+       call    .L095key_192a
+.byte  102,15,58,223,202,32
+       call    .L094key_192b
+.byte  102,15,58,223,202,64
+       call    .L095key_192a
+.byte  102,15,58,223,202,128
+       call    .L094key_192b
+       movups  %xmm0,(%edx)
+       movl    %ecx,48(%edx)
+       xorl    %eax,%eax
+       ret
+.align 16
+.L095key_192a:
+       movups  %xmm0,(%edx)
+       leal    16(%edx),%edx
+.align 16
+.L093key_192a_cold:
+       movaps  %xmm2,%xmm5
+.L096key_192b_warm:
+       shufps  $16,%xmm0,%xmm4
+       movdqa  %xmm2,%xmm3
+       xorps   %xmm4,%xmm0
+       shufps  $140,%xmm0,%xmm4
+       pslldq  $4,%xmm3
+       xorps   %xmm4,%xmm0
+       pshufd  $85,%xmm1,%xmm1
+       pxor    %xmm3,%xmm2
+       pxor    %xmm1,%xmm0
+       pshufd  $255,%xmm0,%xmm3
+       pxor    %xmm3,%xmm2
+       ret
+.align 16
+.L094key_192b:
+       movaps  %xmm0,%xmm3
+       shufps  $68,%xmm0,%xmm5
+       movups  %xmm5,(%edx)
+       shufps  $78,%xmm2,%xmm3
+       movups  %xmm3,16(%edx)
+       leal    32(%edx),%edx
+       jmp     .L096key_192b_warm
+.align 16
+.L08714rounds:
+       movups  16(%eax),%xmm2
+       movl    $13,%ecx
+       leal    16(%edx),%edx
+       movups  %xmm0,-32(%edx)
+       movups  %xmm2,-16(%edx)
+.byte  102,15,58,223,202,1
+       call    .L097key_256a_cold
+.byte  102,15,58,223,200,1
+       call    .L098key_256b
+.byte  102,15,58,223,202,2
+       call    .L099key_256a
+.byte  102,15,58,223,200,2
+       call    .L098key_256b
+.byte  102,15,58,223,202,4
+       call    .L099key_256a
+.byte  102,15,58,223,200,4
+       call    .L098key_256b
+.byte  102,15,58,223,202,8
+       call    .L099key_256a
+.byte  102,15,58,223,200,8
+       call    .L098key_256b
+.byte  102,15,58,223,202,16
+       call    .L099key_256a
+.byte  102,15,58,223,200,16
+       call    .L098key_256b
+.byte  102,15,58,223,202,32
+       call    .L099key_256a
+.byte  102,15,58,223,200,32
+       call    .L098key_256b
+.byte  102,15,58,223,202,64
+       call    .L099key_256a
+       movups  %xmm0,(%edx)
+       movl    %ecx,16(%edx)
+       xorl    %eax,%eax
+       ret
+.align 16
+.L099key_256a:
+       movups  %xmm2,(%edx)
+       leal    16(%edx),%edx
+.L097key_256a_cold:
+       shufps  $16,%xmm0,%xmm4
+       xorps   %xmm4,%xmm0
+       shufps  $140,%xmm0,%xmm4
+       xorps   %xmm4,%xmm0
+       shufps  $255,%xmm1,%xmm1
+       xorps   %xmm1,%xmm0
+       ret
+.align 16
+.L098key_256b:
+       movups  %xmm0,(%edx)
+       leal    16(%edx),%edx
+       shufps  $16,%xmm2,%xmm4
+       xorps   %xmm4,%xmm2
+       shufps  $140,%xmm2,%xmm4
+       xorps   %xmm4,%xmm2
+       shufps  $170,%xmm1,%xmm1
+       xorps   %xmm1,%xmm2
+       ret
+.align 4
+.L086bad_pointer:
+       movl    $-1,%eax
+       ret
+.align 4
+.L089bad_keybits:
+       movl    $-2,%eax
+       ret
+.size  _aesni_set_encrypt_key,.-_aesni_set_encrypt_key
+.globl aesni_set_encrypt_key
+.type  aesni_set_encrypt_key,@function
+.align 16
+aesni_set_encrypt_key:
+.L_aesni_set_encrypt_key_begin:
+       movl    4(%esp),%eax
+       movl    8(%esp),%ecx
+       movl    12(%esp),%edx
+       call    _aesni_set_encrypt_key
+       ret
+.size  aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin
+.globl aesni_set_decrypt_key
+.type  aesni_set_decrypt_key,@function
+.align 16
+aesni_set_decrypt_key:
+.L_aesni_set_decrypt_key_begin:
+       movl    4(%esp),%eax
+       movl    8(%esp),%ecx
+       movl    12(%esp),%edx
+       call    _aesni_set_encrypt_key
+       movl    12(%esp),%edx
+       shll    $4,%ecx
+       testl   %eax,%eax
+       jnz     .L100dec_key_ret
+       leal    16(%edx,%ecx,1),%eax
+       movups  (%edx),%xmm0
+       movups  (%eax),%xmm1
+       movups  %xmm0,(%eax)
+       movups  %xmm1,(%edx)
+       leal    16(%edx),%edx
+       leal    -16(%eax),%eax
+.L101dec_key_inverse:
+       movups  (%edx),%xmm0
+       movups  (%eax),%xmm1
+.byte  102,15,56,219,192
+.byte  102,15,56,219,201
+       leal    16(%edx),%edx
+       leal    -16(%eax),%eax
+       movups  %xmm0,16(%eax)
+       movups  %xmm1,-16(%edx)
+       cmpl    %edx,%eax
+       ja      .L101dec_key_inverse
+       movups  (%edx),%xmm0
+.byte  102,15,56,219,192
+       movups  %xmm0,(%edx)
+       xorl    %eax,%eax
+.L100dec_key_ret:
+       ret
+.size  aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
+.byte  65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+.byte  83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+.byte  32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+.byte  115,108,46,111,114,103,62,0
diff --git a/secure/lib/libcrypto/asm/aesni-x86_64.s b/secure/lib/libcrypto/asm/aesni-x86_64.s
new file mode 100644 (file)
index 0000000..917c832
--- /dev/null
@@ -0,0 +1,2535 @@
+.text  
+.globl aesni_encrypt
+.type  aesni_encrypt,@function
+.align 16
+aesni_encrypt:
+       movups  (%rdi),%xmm2
+       movl    240(%rdx),%eax
+       movups  (%rdx),%xmm0
+       movups  16(%rdx),%xmm1
+       leaq    32(%rdx),%rdx
+       xorps   %xmm0,%xmm2
+.Loop_enc1_1:
+.byte  102,15,56,220,209
+       decl    %eax
+       movups  (%rdx),%xmm1
+       leaq    16(%rdx),%rdx
+       jnz     .Loop_enc1_1    
+.byte  102,15,56,221,209
+       movups  %xmm2,(%rsi)
+       .byte   0xf3,0xc3
+.size  aesni_encrypt,.-aesni_encrypt
+
+.globl aesni_decrypt
+.type  aesni_decrypt,@function
+.align 16
+aesni_decrypt:
+       movups  (%rdi),%xmm2
+       movl    240(%rdx),%eax
+       movups  (%rdx),%xmm0
+       movups  16(%rdx),%xmm1
+       leaq    32(%rdx),%rdx
+       xorps   %xmm0,%xmm2
+.Loop_dec1_2:
+.byte  102,15,56,222,209
+       decl    %eax
+       movups  (%rdx),%xmm1
+       leaq    16(%rdx),%rdx
+       jnz     .Loop_dec1_2    
+.byte  102,15,56,223,209
+       movups  %xmm2,(%rsi)
+       .byte   0xf3,0xc3
+.size  aesni_decrypt, .-aesni_decrypt
+.type  _aesni_encrypt3,@function
+.align 16
+_aesni_encrypt3:
+       movups  (%rcx),%xmm0
+       shrl    $1,%eax
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+       xorps   %xmm0,%xmm3
+       xorps   %xmm0,%xmm4
+       movups  (%rcx),%xmm0
+
+.Lenc_loop3:
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+       decl    %eax
+.byte  102,15,56,220,225
+       movups  16(%rcx),%xmm1
+.byte  102,15,56,220,208
+.byte  102,15,56,220,216
+       leaq    32(%rcx),%rcx
+.byte  102,15,56,220,224
+       movups  (%rcx),%xmm0
+       jnz     .Lenc_loop3
+
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+.byte  102,15,56,220,225
+.byte  102,15,56,221,208
+.byte  102,15,56,221,216
+.byte  102,15,56,221,224
+       .byte   0xf3,0xc3
+.size  _aesni_encrypt3,.-_aesni_encrypt3
+.type  _aesni_decrypt3,@function
+.align 16
+_aesni_decrypt3:
+       movups  (%rcx),%xmm0
+       shrl    $1,%eax
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+       xorps   %xmm0,%xmm3
+       xorps   %xmm0,%xmm4
+       movups  (%rcx),%xmm0
+
+.Ldec_loop3:
+.byte  102,15,56,222,209
+.byte  102,15,56,222,217
+       decl    %eax
+.byte  102,15,56,222,225
+       movups  16(%rcx),%xmm1
+.byte  102,15,56,222,208
+.byte  102,15,56,222,216
+       leaq    32(%rcx),%rcx
+.byte  102,15,56,222,224
+       movups  (%rcx),%xmm0
+       jnz     .Ldec_loop3
+
+.byte  102,15,56,222,209
+.byte  102,15,56,222,217
+.byte  102,15,56,222,225
+.byte  102,15,56,223,208
+.byte  102,15,56,223,216
+.byte  102,15,56,223,224
+       .byte   0xf3,0xc3
+.size  _aesni_decrypt3,.-_aesni_decrypt3
+.type  _aesni_encrypt4,@function
+.align 16
+_aesni_encrypt4:
+       movups  (%rcx),%xmm0
+       shrl    $1,%eax
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+       xorps   %xmm0,%xmm3
+       xorps   %xmm0,%xmm4
+       xorps   %xmm0,%xmm5
+       movups  (%rcx),%xmm0
+
+.Lenc_loop4:
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+       decl    %eax
+.byte  102,15,56,220,225
+.byte  102,15,56,220,233
+       movups  16(%rcx),%xmm1
+.byte  102,15,56,220,208
+.byte  102,15,56,220,216
+       leaq    32(%rcx),%rcx
+.byte  102,15,56,220,224
+.byte  102,15,56,220,232
+       movups  (%rcx),%xmm0
+       jnz     .Lenc_loop4
+
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+.byte  102,15,56,220,225
+.byte  102,15,56,220,233
+.byte  102,15,56,221,208
+.byte  102,15,56,221,216
+.byte  102,15,56,221,224
+.byte  102,15,56,221,232
+       .byte   0xf3,0xc3
+.size  _aesni_encrypt4,.-_aesni_encrypt4
+.type  _aesni_decrypt4,@function
+.align 16
+_aesni_decrypt4:
+       movups  (%rcx),%xmm0
+       shrl    $1,%eax
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+       xorps   %xmm0,%xmm3
+       xorps   %xmm0,%xmm4
+       xorps   %xmm0,%xmm5
+       movups  (%rcx),%xmm0
+
+.Ldec_loop4:
+.byte  102,15,56,222,209
+.byte  102,15,56,222,217
+       decl    %eax
+.byte  102,15,56,222,225
+.byte  102,15,56,222,233
+       movups  16(%rcx),%xmm1
+.byte  102,15,56,222,208
+.byte  102,15,56,222,216
+       leaq    32(%rcx),%rcx
+.byte  102,15,56,222,224
+.byte  102,15,56,222,232
+       movups  (%rcx),%xmm0
+       jnz     .Ldec_loop4
+
+.byte  102,15,56,222,209
+.byte  102,15,56,222,217
+.byte  102,15,56,222,225
+.byte  102,15,56,222,233
+.byte  102,15,56,223,208
+.byte  102,15,56,223,216
+.byte  102,15,56,223,224
+.byte  102,15,56,223,232
+       .byte   0xf3,0xc3
+.size  _aesni_decrypt4,.-_aesni_decrypt4
+.type  _aesni_encrypt6,@function
+.align 16
+_aesni_encrypt6:
+       movups  (%rcx),%xmm0
+       shrl    $1,%eax
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+       pxor    %xmm0,%xmm3
+.byte  102,15,56,220,209
+       pxor    %xmm0,%xmm4
+.byte  102,15,56,220,217
+       pxor    %xmm0,%xmm5
+.byte  102,15,56,220,225
+       pxor    %xmm0,%xmm6
+.byte  102,15,56,220,233
+       pxor    %xmm0,%xmm7
+       decl    %eax
+.byte  102,15,56,220,241
+       movups  (%rcx),%xmm0
+.byte  102,15,56,220,249
+       jmp     .Lenc_loop6_enter
+.align 16
+.Lenc_loop6:
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+       decl    %eax
+.byte  102,15,56,220,225
+.byte  102,15,56,220,233
+.byte  102,15,56,220,241
+.byte  102,15,56,220,249
+.Lenc_loop6_enter:
+       movups  16(%rcx),%xmm1
+.byte  102,15,56,220,208
+.byte  102,15,56,220,216
+       leaq    32(%rcx),%rcx
+.byte  102,15,56,220,224
+.byte  102,15,56,220,232
+.byte  102,15,56,220,240
+.byte  102,15,56,220,248
+       movups  (%rcx),%xmm0
+       jnz     .Lenc_loop6
+
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+.byte  102,15,56,220,225
+.byte  102,15,56,220,233
+.byte  102,15,56,220,241
+.byte  102,15,56,220,249
+.byte  102,15,56,221,208
+.byte  102,15,56,221,216
+.byte  102,15,56,221,224
+.byte  102,15,56,221,232
+.byte  102,15,56,221,240
+.byte  102,15,56,221,248
+       .byte   0xf3,0xc3
+.size  _aesni_encrypt6,.-_aesni_encrypt6
+.type  _aesni_decrypt6,@function
+.align 16
+_aesni_decrypt6:
+       movups  (%rcx),%xmm0
+       shrl    $1,%eax
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+       pxor    %xmm0,%xmm3
+.byte  102,15,56,222,209
+       pxor    %xmm0,%xmm4
+.byte  102,15,56,222,217
+       pxor    %xmm0,%xmm5
+.byte  102,15,56,222,225
+       pxor    %xmm0,%xmm6
+.byte  102,15,56,222,233
+       pxor    %xmm0,%xmm7
+       decl    %eax
+.byte  102,15,56,222,241
+       movups  (%rcx),%xmm0
+.byte  102,15,56,222,249
+       jmp     .Ldec_loop6_enter
+.align 16
+.Ldec_loop6:
+.byte  102,15,56,222,209
+.byte  102,15,56,222,217
+       decl    %eax
+.byte  102,15,56,222,225
+.byte  102,15,56,222,233
+.byte  102,15,56,222,241
+.byte  102,15,56,222,249
+.Ldec_loop6_enter:
+       movups  16(%rcx),%xmm1
+.byte  102,15,56,222,208
+.byte  102,15,56,222,216
+       leaq    32(%rcx),%rcx
+.byte  102,15,56,222,224
+.byte  102,15,56,222,232
+.byte  102,15,56,222,240
+.byte  102,15,56,222,248
+       movups  (%rcx),%xmm0
+       jnz     .Ldec_loop6
+
+.byte  102,15,56,222,209
+.byte  102,15,56,222,217
+.byte  102,15,56,222,225
+.byte  102,15,56,222,233
+.byte  102,15,56,222,241
+.byte  102,15,56,222,249
+.byte  102,15,56,223,208
+.byte  102,15,56,223,216
+.byte  102,15,56,223,224
+.byte  102,15,56,223,232
+.byte  102,15,56,223,240
+.byte  102,15,56,223,248
+       .byte   0xf3,0xc3
+.size  _aesni_decrypt6,.-_aesni_decrypt6
+.type  _aesni_encrypt8,@function
+.align 16
+_aesni_encrypt8:
+       movups  (%rcx),%xmm0
+       shrl    $1,%eax
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+       xorps   %xmm0,%xmm3
+.byte  102,15,56,220,209
+       pxor    %xmm0,%xmm4
+.byte  102,15,56,220,217
+       pxor    %xmm0,%xmm5
+.byte  102,15,56,220,225
+       pxor    %xmm0,%xmm6
+.byte  102,15,56,220,233
+       pxor    %xmm0,%xmm7
+       decl    %eax
+.byte  102,15,56,220,241
+       pxor    %xmm0,%xmm8
+.byte  102,15,56,220,249
+       pxor    %xmm0,%xmm9
+       movups  (%rcx),%xmm0
+.byte  102,68,15,56,220,193
+.byte  102,68,15,56,220,201
+       movups  16(%rcx),%xmm1
+       jmp     .Lenc_loop8_enter
+.align 16
+.Lenc_loop8:
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+       decl    %eax
+.byte  102,15,56,220,225
+.byte  102,15,56,220,233
+.byte  102,15,56,220,241
+.byte  102,15,56,220,249
+.byte  102,68,15,56,220,193
+.byte  102,68,15,56,220,201
+       movups  16(%rcx),%xmm1
+.Lenc_loop8_enter:
+.byte  102,15,56,220,208
+.byte  102,15,56,220,216
+       leaq    32(%rcx),%rcx
+.byte  102,15,56,220,224
+.byte  102,15,56,220,232
+.byte  102,15,56,220,240
+.byte  102,15,56,220,248
+.byte  102,68,15,56,220,192
+.byte  102,68,15,56,220,200
+       movups  (%rcx),%xmm0
+       jnz     .Lenc_loop8
+
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+.byte  102,15,56,220,225
+.byte  102,15,56,220,233
+.byte  102,15,56,220,241
+.byte  102,15,56,220,249
+.byte  102,68,15,56,220,193
+.byte  102,68,15,56,220,201
+.byte  102,15,56,221,208
+.byte  102,15,56,221,216
+.byte  102,15,56,221,224
+.byte  102,15,56,221,232
+.byte  102,15,56,221,240
+.byte  102,15,56,221,248
+.byte  102,68,15,56,221,192
+.byte  102,68,15,56,221,200
+       .byte   0xf3,0xc3
+.size  _aesni_encrypt8,.-_aesni_encrypt8
+.type  _aesni_decrypt8,@function
+.align 16
+_aesni_decrypt8:
+       movups  (%rcx),%xmm0
+       shrl    $1,%eax
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+       xorps   %xmm0,%xmm3
+.byte  102,15,56,222,209
+       pxor    %xmm0,%xmm4
+.byte  102,15,56,222,217
+       pxor    %xmm0,%xmm5
+.byte  102,15,56,222,225
+       pxor    %xmm0,%xmm6
+.byte  102,15,56,222,233
+       pxor    %xmm0,%xmm7
+       decl    %eax
+.byte  102,15,56,222,241
+       pxor    %xmm0,%xmm8
+.byte  102,15,56,222,249
+       pxor    %xmm0,%xmm9
+       movups  (%rcx),%xmm0
+.byte  102,68,15,56,222,193
+.byte  102,68,15,56,222,201
+       movups  16(%rcx),%xmm1
+       jmp     .Ldec_loop8_enter
+.align 16
+.Ldec_loop8:
+.byte  102,15,56,222,209
+.byte  102,15,56,222,217
+       decl    %eax
+.byte  102,15,56,222,225
+.byte  102,15,56,222,233
+.byte  102,15,56,222,241
+.byte  102,15,56,222,249
+.byte  102,68,15,56,222,193
+.byte  102,68,15,56,222,201
+       movups  16(%rcx),%xmm1
+.Ldec_loop8_enter:
+.byte  102,15,56,222,208
+.byte  102,15,56,222,216
+       leaq    32(%rcx),%rcx
+.byte  102,15,56,222,224
+.byte  102,15,56,222,232
+.byte  102,15,56,222,240
+.byte  102,15,56,222,248
+.byte  102,68,15,56,222,192
+.byte  102,68,15,56,222,200
+       movups  (%rcx),%xmm0
+       jnz     .Ldec_loop8
+
+.byte  102,15,56,222,209
+.byte  102,15,56,222,217
+.byte  102,15,56,222,225
+.byte  102,15,56,222,233
+.byte  102,15,56,222,241
+.byte  102,15,56,222,249
+.byte  102,68,15,56,222,193
+.byte  102,68,15,56,222,201
+.byte  102,15,56,223,208
+.byte  102,15,56,223,216
+.byte  102,15,56,223,224
+.byte  102,15,56,223,232
+.byte  102,15,56,223,240
+.byte  102,15,56,223,248
+.byte  102,68,15,56,223,192
+.byte  102,68,15,56,223,200
+       .byte   0xf3,0xc3
+.size  _aesni_decrypt8,.-_aesni_decrypt8
+.globl aesni_ecb_encrypt
+.type  aesni_ecb_encrypt,@function
+.align 16
+aesni_ecb_encrypt:
+       andq    $-16,%rdx
+       jz      .Lecb_ret
+
+       movl    240(%rcx),%eax
+       movups  (%rcx),%xmm0
+       movq    %rcx,%r11
+       movl    %eax,%r10d
+       testl   %r8d,%r8d
+       jz      .Lecb_decrypt
+
+       cmpq    $128,%rdx
+       jb      .Lecb_enc_tail
+
+       movdqu  (%rdi),%xmm2
+       movdqu  16(%rdi),%xmm3
+       movdqu  32(%rdi),%xmm4
+       movdqu  48(%rdi),%xmm5
+       movdqu  64(%rdi),%xmm6
+       movdqu  80(%rdi),%xmm7
+       movdqu  96(%rdi),%xmm8
+       movdqu  112(%rdi),%xmm9
+       leaq    128(%rdi),%rdi
+       subq    $128,%rdx
+       jmp     .Lecb_enc_loop8_enter
+.align 16
+.Lecb_enc_loop8:
+       movups  %xmm2,(%rsi)
+       movq    %r11,%rcx
+       movdqu  (%rdi),%xmm2
+       movl    %r10d,%eax
+       movups  %xmm3,16(%rsi)
+       movdqu  16(%rdi),%xmm3
+       movups  %xmm4,32(%rsi)
+       movdqu  32(%rdi),%xmm4
+       movups  %xmm5,48(%rsi)
+       movdqu  48(%rdi),%xmm5
+       movups  %xmm6,64(%rsi)
+       movdqu  64(%rdi),%xmm6
+       movups  %xmm7,80(%rsi)
+       movdqu  80(%rdi),%xmm7
+       movups  %xmm8,96(%rsi)
+       movdqu  96(%rdi),%xmm8
+       movups  %xmm9,112(%rsi)
+       leaq    128(%rsi),%rsi
+       movdqu  112(%rdi),%xmm9
+       leaq    128(%rdi),%rdi
+.Lecb_enc_loop8_enter:
+
+       call    _aesni_encrypt8
+
+       subq    $128,%rdx
+       jnc     .Lecb_enc_loop8
+
+       movups  %xmm2,(%rsi)
+       movq    %r11,%rcx
+       movups  %xmm3,16(%rsi)
+       movl    %r10d,%eax
+       movups  %xmm4,32(%rsi)
+       movups  %xmm5,48(%rsi)
+       movups  %xmm6,64(%rsi)
+       movups  %xmm7,80(%rsi)
+       movups  %xmm8,96(%rsi)
+       movups  %xmm9,112(%rsi)
+       leaq    128(%rsi),%rsi
+       addq    $128,%rdx
+       jz      .Lecb_ret
+
+.Lecb_enc_tail:
+       movups  (%rdi),%xmm2
+       cmpq    $32,%rdx
+       jb      .Lecb_enc_one
+       movups  16(%rdi),%xmm3
+       je      .Lecb_enc_two
+       movups  32(%rdi),%xmm4
+       cmpq    $64,%rdx
+       jb      .Lecb_enc_three
+       movups  48(%rdi),%xmm5
+       je      .Lecb_enc_four
+       movups  64(%rdi),%xmm6
+       cmpq    $96,%rdx
+       jb      .Lecb_enc_five
+       movups  80(%rdi),%xmm7
+       je      .Lecb_enc_six
+       movdqu  96(%rdi),%xmm8
+       call    _aesni_encrypt8
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       movups  %xmm5,48(%rsi)
+       movups  %xmm6,64(%rsi)
+       movups  %xmm7,80(%rsi)
+       movups  %xmm8,96(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_enc_one:
+       movups  (%rcx),%xmm0
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+.Loop_enc1_3:
+.byte  102,15,56,220,209
+       decl    %eax
+       movups  (%rcx),%xmm1
+       leaq    16(%rcx),%rcx
+       jnz     .Loop_enc1_3    
+.byte  102,15,56,221,209
+       movups  %xmm2,(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_enc_two:
+       xorps   %xmm4,%xmm4
+       call    _aesni_encrypt3
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_enc_three:
+       call    _aesni_encrypt3
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_enc_four:
+       call    _aesni_encrypt4
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       movups  %xmm5,48(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_enc_five:
+       xorps   %xmm7,%xmm7
+       call    _aesni_encrypt6
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       movups  %xmm5,48(%rsi)
+       movups  %xmm6,64(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_enc_six:
+       call    _aesni_encrypt6
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       movups  %xmm5,48(%rsi)
+       movups  %xmm6,64(%rsi)
+       movups  %xmm7,80(%rsi)
+       jmp     .Lecb_ret
+
+.align 16
+.Lecb_decrypt:
+       cmpq    $128,%rdx
+       jb      .Lecb_dec_tail
+
+       movdqu  (%rdi),%xmm2
+       movdqu  16(%rdi),%xmm3
+       movdqu  32(%rdi),%xmm4
+       movdqu  48(%rdi),%xmm5
+       movdqu  64(%rdi),%xmm6
+       movdqu  80(%rdi),%xmm7
+       movdqu  96(%rdi),%xmm8
+       movdqu  112(%rdi),%xmm9
+       leaq    128(%rdi),%rdi
+       subq    $128,%rdx
+       jmp     .Lecb_dec_loop8_enter
+.align 16
+.Lecb_dec_loop8:
+       movups  %xmm2,(%rsi)
+       movq    %r11,%rcx
+       movdqu  (%rdi),%xmm2
+       movl    %r10d,%eax
+       movups  %xmm3,16(%rsi)
+       movdqu  16(%rdi),%xmm3
+       movups  %xmm4,32(%rsi)
+       movdqu  32(%rdi),%xmm4
+       movups  %xmm5,48(%rsi)
+       movdqu  48(%rdi),%xmm5
+       movups  %xmm6,64(%rsi)
+       movdqu  64(%rdi),%xmm6
+       movups  %xmm7,80(%rsi)
+       movdqu  80(%rdi),%xmm7
+       movups  %xmm8,96(%rsi)
+       movdqu  96(%rdi),%xmm8
+       movups  %xmm9,112(%rsi)
+       leaq    128(%rsi),%rsi
+       movdqu  112(%rdi),%xmm9
+       leaq    128(%rdi),%rdi
+.Lecb_dec_loop8_enter:
+
+       call    _aesni_decrypt8
+
+       movups  (%r11),%xmm0
+       subq    $128,%rdx
+       jnc     .Lecb_dec_loop8
+
+       movups  %xmm2,(%rsi)
+       movq    %r11,%rcx
+       movups  %xmm3,16(%rsi)
+       movl    %r10d,%eax
+       movups  %xmm4,32(%rsi)
+       movups  %xmm5,48(%rsi)
+       movups  %xmm6,64(%rsi)
+       movups  %xmm7,80(%rsi)
+       movups  %xmm8,96(%rsi)
+       movups  %xmm9,112(%rsi)
+       leaq    128(%rsi),%rsi
+       addq    $128,%rdx
+       jz      .Lecb_ret
+
+.Lecb_dec_tail:
+       movups  (%rdi),%xmm2
+       cmpq    $32,%rdx
+       jb      .Lecb_dec_one
+       movups  16(%rdi),%xmm3
+       je      .Lecb_dec_two
+       movups  32(%rdi),%xmm4
+       cmpq    $64,%rdx
+       jb      .Lecb_dec_three
+       movups  48(%rdi),%xmm5
+       je      .Lecb_dec_four
+       movups  64(%rdi),%xmm6
+       cmpq    $96,%rdx
+       jb      .Lecb_dec_five
+       movups  80(%rdi),%xmm7
+       je      .Lecb_dec_six
+       movups  96(%rdi),%xmm8
+       movups  (%rcx),%xmm0
+       call    _aesni_decrypt8
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       movups  %xmm5,48(%rsi)
+       movups  %xmm6,64(%rsi)
+       movups  %xmm7,80(%rsi)
+       movups  %xmm8,96(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_dec_one:
+       movups  (%rcx),%xmm0
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+.Loop_dec1_4:
+.byte  102,15,56,222,209
+       decl    %eax
+       movups  (%rcx),%xmm1
+       leaq    16(%rcx),%rcx
+       jnz     .Loop_dec1_4    
+.byte  102,15,56,223,209
+       movups  %xmm2,(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_dec_two:
+       xorps   %xmm4,%xmm4
+       call    _aesni_decrypt3
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_dec_three:
+       call    _aesni_decrypt3
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_dec_four:
+       call    _aesni_decrypt4
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       movups  %xmm5,48(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_dec_five:
+       xorps   %xmm7,%xmm7
+       call    _aesni_decrypt6
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       movups  %xmm5,48(%rsi)
+       movups  %xmm6,64(%rsi)
+       jmp     .Lecb_ret
+.align 16
+.Lecb_dec_six:
+       call    _aesni_decrypt6
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       movups  %xmm5,48(%rsi)
+       movups  %xmm6,64(%rsi)
+       movups  %xmm7,80(%rsi)
+
+.Lecb_ret:
+       .byte   0xf3,0xc3
+.size  aesni_ecb_encrypt,.-aesni_ecb_encrypt
+.globl aesni_ccm64_encrypt_blocks
+.type  aesni_ccm64_encrypt_blocks,@function
+.align 16
+aesni_ccm64_encrypt_blocks:
+       movl    240(%rcx),%eax
+       movdqu  (%r8),%xmm9
+       movdqa  .Lincrement64(%rip),%xmm6
+       movdqa  .Lbswap_mask(%rip),%xmm7
+
+       shrl    $1,%eax
+       leaq    0(%rcx),%r11
+       movdqu  (%r9),%xmm3
+       movdqa  %xmm9,%xmm2
+       movl    %eax,%r10d
+.byte  102,68,15,56,0,207
+       jmp     .Lccm64_enc_outer
+.align 16
+.Lccm64_enc_outer:
+       movups  (%r11),%xmm0
+       movl    %r10d,%eax
+       movups  (%rdi),%xmm8
+
+       xorps   %xmm0,%xmm2
+       movups  16(%r11),%xmm1
+       xorps   %xmm8,%xmm0
+       leaq    32(%r11),%rcx
+       xorps   %xmm0,%xmm3
+       movups  (%rcx),%xmm0
+
+.Lccm64_enc2_loop:
+.byte  102,15,56,220,209
+       decl    %eax
+.byte  102,15,56,220,217
+       movups  16(%rcx),%xmm1
+.byte  102,15,56,220,208
+       leaq    32(%rcx),%rcx
+.byte  102,15,56,220,216
+       movups  0(%rcx),%xmm0
+       jnz     .Lccm64_enc2_loop
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+       paddq   %xmm6,%xmm9
+.byte  102,15,56,221,208
+.byte  102,15,56,221,216
+
+       decq    %rdx
+       leaq    16(%rdi),%rdi
+       xorps   %xmm2,%xmm8
+       movdqa  %xmm9,%xmm2
+       movups  %xmm8,(%rsi)
+       leaq    16(%rsi),%rsi
+.byte  102,15,56,0,215
+       jnz     .Lccm64_enc_outer
+
+       movups  %xmm3,(%r9)
+       .byte   0xf3,0xc3
+.size  aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
+.globl aesni_ccm64_decrypt_blocks
+.type  aesni_ccm64_decrypt_blocks,@function
+.align 16
+aesni_ccm64_decrypt_blocks:
+       movl    240(%rcx),%eax
+       movups  (%r8),%xmm9
+       movdqu  (%r9),%xmm3
+       movdqa  .Lincrement64(%rip),%xmm6
+       movdqa  .Lbswap_mask(%rip),%xmm7
+
+       movaps  %xmm9,%xmm2
+       movl    %eax,%r10d
+       movq    %rcx,%r11
+.byte  102,68,15,56,0,207
+       movups  (%rcx),%xmm0
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+.Loop_enc1_5:
+.byte  102,15,56,220,209
+       decl    %eax
+       movups  (%rcx),%xmm1
+       leaq    16(%rcx),%rcx
+       jnz     .Loop_enc1_5    
+.byte  102,15,56,221,209
+       movups  (%rdi),%xmm8
+       paddq   %xmm6,%xmm9
+       leaq    16(%rdi),%rdi
+       jmp     .Lccm64_dec_outer
+.align 16
+.Lccm64_dec_outer:
+       xorps   %xmm2,%xmm8
+       movdqa  %xmm9,%xmm2
+       movl    %r10d,%eax
+       movups  %xmm8,(%rsi)
+       leaq    16(%rsi),%rsi
+.byte  102,15,56,0,215
+
+       subq    $1,%rdx
+       jz      .Lccm64_dec_break
+
+       movups  (%r11),%xmm0
+       shrl    $1,%eax
+       movups  16(%r11),%xmm1
+       xorps   %xmm0,%xmm8
+       leaq    32(%r11),%rcx
+       xorps   %xmm0,%xmm2
+       xorps   %xmm8,%xmm3
+       movups  (%rcx),%xmm0
+
+.Lccm64_dec2_loop:
+.byte  102,15,56,220,209
+       decl    %eax
+.byte  102,15,56,220,217
+       movups  16(%rcx),%xmm1
+.byte  102,15,56,220,208
+       leaq    32(%rcx),%rcx
+.byte  102,15,56,220,216
+       movups  0(%rcx),%xmm0
+       jnz     .Lccm64_dec2_loop
+       movups  (%rdi),%xmm8
+       paddq   %xmm6,%xmm9
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+       leaq    16(%rdi),%rdi
+.byte  102,15,56,221,208
+.byte  102,15,56,221,216
+       jmp     .Lccm64_dec_outer
+
+.align 16
+.Lccm64_dec_break:
+
+       movups  (%r11),%xmm0
+       movups  16(%r11),%xmm1
+       xorps   %xmm0,%xmm8
+       leaq    32(%r11),%r11
+       xorps   %xmm8,%xmm3
+.Loop_enc1_6:
+.byte  102,15,56,220,217
+       decl    %eax
+       movups  (%r11),%xmm1
+       leaq    16(%r11),%r11
+       jnz     .Loop_enc1_6    
+.byte  102,15,56,221,217
+       movups  %xmm3,(%r9)
+       .byte   0xf3,0xc3
+.size  aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
+.globl aesni_ctr32_encrypt_blocks
+.type  aesni_ctr32_encrypt_blocks,@function
+.align 16
+aesni_ctr32_encrypt_blocks:
+       cmpq    $1,%rdx
+       je      .Lctr32_one_shortcut
+
+       movdqu  (%r8),%xmm14
+       movdqa  .Lbswap_mask(%rip),%xmm15
+       xorl    %eax,%eax
+.byte  102,69,15,58,22,242,3
+.byte  102,68,15,58,34,240,3
+
+       movl    240(%rcx),%eax
+       bswapl  %r10d
+       pxor    %xmm12,%xmm12
+       pxor    %xmm13,%xmm13
+.byte  102,69,15,58,34,226,0
+       leaq    3(%r10),%r11
+.byte  102,69,15,58,34,235,0
+       incl    %r10d
+.byte  102,69,15,58,34,226,1
+       incq    %r11
+.byte  102,69,15,58,34,235,1
+       incl    %r10d
+.byte  102,69,15,58,34,226,2
+       incq    %r11
+.byte  102,69,15,58,34,235,2
+       movdqa  %xmm12,-40(%rsp)
+.byte  102,69,15,56,0,231
+       movdqa  %xmm13,-24(%rsp)
+.byte  102,69,15,56,0,239
+
+       pshufd  $192,%xmm12,%xmm2
+       pshufd  $128,%xmm12,%xmm3
+       pshufd  $64,%xmm12,%xmm4
+       cmpq    $6,%rdx
+       jb      .Lctr32_tail
+       shrl    $1,%eax
+       movq    %rcx,%r11
+       movl    %eax,%r10d
+       subq    $6,%rdx
+       jmp     .Lctr32_loop6
+
+.align 16
+.Lctr32_loop6:
+       pshufd  $192,%xmm13,%xmm5
+       por     %xmm14,%xmm2
+       movups  (%r11),%xmm0
+       pshufd  $128,%xmm13,%xmm6
+       por     %xmm14,%xmm3
+       movups  16(%r11),%xmm1
+       pshufd  $64,%xmm13,%xmm7
+       por     %xmm14,%xmm4
+       por     %xmm14,%xmm5
+       xorps   %xmm0,%xmm2
+       por     %xmm14,%xmm6
+       por     %xmm14,%xmm7
+
+
+
+
+       pxor    %xmm0,%xmm3
+.byte  102,15,56,220,209
+       leaq    32(%r11),%rcx
+       pxor    %xmm0,%xmm4
+.byte  102,15,56,220,217
+       movdqa  .Lincrement32(%rip),%xmm13
+       pxor    %xmm0,%xmm5
+.byte  102,15,56,220,225
+       movdqa  -40(%rsp),%xmm12
+       pxor    %xmm0,%xmm6
+.byte  102,15,56,220,233
+       pxor    %xmm0,%xmm7
+       movups  (%rcx),%xmm0
+       decl    %eax
+.byte  102,15,56,220,241
+.byte  102,15,56,220,249
+       jmp     .Lctr32_enc_loop6_enter
+.align 16
+.Lctr32_enc_loop6:
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+       decl    %eax
+.byte  102,15,56,220,225
+.byte  102,15,56,220,233
+.byte  102,15,56,220,241
+.byte  102,15,56,220,249
+.Lctr32_enc_loop6_enter:
+       movups  16(%rcx),%xmm1
+.byte  102,15,56,220,208
+.byte  102,15,56,220,216
+       leaq    32(%rcx),%rcx
+.byte  102,15,56,220,224
+.byte  102,15,56,220,232
+.byte  102,15,56,220,240
+.byte  102,15,56,220,248
+       movups  (%rcx),%xmm0
+       jnz     .Lctr32_enc_loop6
+
+.byte  102,15,56,220,209
+       paddd   %xmm13,%xmm12
+.byte  102,15,56,220,217
+       paddd   -24(%rsp),%xmm13
+.byte  102,15,56,220,225
+       movdqa  %xmm12,-40(%rsp)
+.byte  102,15,56,220,233
+       movdqa  %xmm13,-24(%rsp)
+.byte  102,15,56,220,241
+.byte  102,69,15,56,0,231
+.byte  102,15,56,220,249
+.byte  102,69,15,56,0,239
+
+.byte  102,15,56,221,208
+       movups  (%rdi),%xmm8
+.byte  102,15,56,221,216
+       movups  16(%rdi),%xmm9
+.byte  102,15,56,221,224
+       movups  32(%rdi),%xmm10
+.byte  102,15,56,221,232
+       movups  48(%rdi),%xmm11
+.byte  102,15,56,221,240
+       movups  64(%rdi),%xmm1
+.byte  102,15,56,221,248
+       movups  80(%rdi),%xmm0
+       leaq    96(%rdi),%rdi
+
+       xorps   %xmm2,%xmm8
+       pshufd  $192,%xmm12,%xmm2
+       xorps   %xmm3,%xmm9
+       pshufd  $128,%xmm12,%xmm3
+       movups  %xmm8,(%rsi)
+       xorps   %xmm4,%xmm10
+       pshufd  $64,%xmm12,%xmm4
+       movups  %xmm9,16(%rsi)
+       xorps   %xmm5,%xmm11
+       movups  %xmm10,32(%rsi)
+       xorps   %xmm6,%xmm1
+       movups  %xmm11,48(%rsi)
+       xorps   %xmm7,%xmm0
+       movups  %xmm1,64(%rsi)
+       movups  %xmm0,80(%rsi)
+       leaq    96(%rsi),%rsi
+       movl    %r10d,%eax
+       subq    $6,%rdx
+       jnc     .Lctr32_loop6
+
+       addq    $6,%rdx
+       jz      .Lctr32_done
+       movq    %r11,%rcx
+       leal    1(%rax,%rax,1),%eax
+
+.Lctr32_tail:
+       por     %xmm14,%xmm2
+       movups  (%rdi),%xmm8
+       cmpq    $2,%rdx
+       jb      .Lctr32_one
+
+       por     %xmm14,%xmm3
+       movups  16(%rdi),%xmm9
+       je      .Lctr32_two
+
+       pshufd  $192,%xmm13,%xmm5
+       por     %xmm14,%xmm4
+       movups  32(%rdi),%xmm10
+       cmpq    $4,%rdx
+       jb      .Lctr32_three
+
+       pshufd  $128,%xmm13,%xmm6
+       por     %xmm14,%xmm5
+       movups  48(%rdi),%xmm11
+       je      .Lctr32_four
+
+       por     %xmm14,%xmm6
+       xorps   %xmm7,%xmm7
+
+       call    _aesni_encrypt6
+
+       movups  64(%rdi),%xmm1
+       xorps   %xmm2,%xmm8
+       xorps   %xmm3,%xmm9
+       movups  %xmm8,(%rsi)
+       xorps   %xmm4,%xmm10
+       movups  %xmm9,16(%rsi)
+       xorps   %xmm5,%xmm11
+       movups  %xmm10,32(%rsi)
+       xorps   %xmm6,%xmm1
+       movups  %xmm11,48(%rsi)
+       movups  %xmm1,64(%rsi)
+       jmp     .Lctr32_done
+
+.align 16
+.Lctr32_one_shortcut:
+       movups  (%r8),%xmm2
+       movups  (%rdi),%xmm8
+       movl    240(%rcx),%eax
+.Lctr32_one:
+       movups  (%rcx),%xmm0
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+.Loop_enc1_7:
+.byte  102,15,56,220,209
+       decl    %eax
+       movups  (%rcx),%xmm1
+       leaq    16(%rcx),%rcx
+       jnz     .Loop_enc1_7    
+.byte  102,15,56,221,209
+       xorps   %xmm2,%xmm8
+       movups  %xmm8,(%rsi)
+       jmp     .Lctr32_done
+
+.align 16
+.Lctr32_two:
+       xorps   %xmm4,%xmm4
+       call    _aesni_encrypt3
+       xorps   %xmm2,%xmm8
+       xorps   %xmm3,%xmm9
+       movups  %xmm8,(%rsi)
+       movups  %xmm9,16(%rsi)
+       jmp     .Lctr32_done
+
+.align 16
+.Lctr32_three:
+       call    _aesni_encrypt3
+       xorps   %xmm2,%xmm8
+       xorps   %xmm3,%xmm9
+       movups  %xmm8,(%rsi)
+       xorps   %xmm4,%xmm10
+       movups  %xmm9,16(%rsi)
+       movups  %xmm10,32(%rsi)
+       jmp     .Lctr32_done
+
+.align 16
+.Lctr32_four:
+       call    _aesni_encrypt4
+       xorps   %xmm2,%xmm8
+       xorps   %xmm3,%xmm9
+       movups  %xmm8,(%rsi)
+       xorps   %xmm4,%xmm10
+       movups  %xmm9,16(%rsi)
+       xorps   %xmm5,%xmm11
+       movups  %xmm10,32(%rsi)
+       movups  %xmm11,48(%rsi)
+
+.Lctr32_done:
+       .byte   0xf3,0xc3
+.size  aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
+.globl aesni_xts_encrypt
+.type  aesni_xts_encrypt,@function
+.align 16
+aesni_xts_encrypt:
+       leaq    -104(%rsp),%rsp
+       movups  (%r9),%xmm15
+       movl    240(%r8),%eax
+       movl    240(%rcx),%r10d
+       movups  (%r8),%xmm0
+       movups  16(%r8),%xmm1
+       leaq    32(%r8),%r8
+       xorps   %xmm0,%xmm15
+.Loop_enc1_8:
+.byte  102,68,15,56,220,249
+       decl    %eax
+       movups  (%r8),%xmm1
+       leaq    16(%r8),%r8
+       jnz     .Loop_enc1_8    
+.byte  102,68,15,56,221,249
+       movq    %rcx,%r11
+       movl    %r10d,%eax
+       movq    %rdx,%r9
+       andq    $-16,%rdx
+
+       movdqa  .Lxts_magic(%rip),%xmm8
+       pxor    %xmm14,%xmm14
+       pcmpgtd %xmm15,%xmm14
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       movdqa  %xmm15,%xmm10
+       paddq   %xmm15,%xmm15
+       pand    %xmm8,%xmm9
+       pcmpgtd %xmm15,%xmm14
+       pxor    %xmm9,%xmm15
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       movdqa  %xmm15,%xmm11
+       paddq   %xmm15,%xmm15
+       pand    %xmm8,%xmm9
+       pcmpgtd %xmm15,%xmm14
+       pxor    %xmm9,%xmm15
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       movdqa  %xmm15,%xmm12
+       paddq   %xmm15,%xmm15
+       pand    %xmm8,%xmm9
+       pcmpgtd %xmm15,%xmm14
+       pxor    %xmm9,%xmm15
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       movdqa  %xmm15,%xmm13
+       paddq   %xmm15,%xmm15
+       pand    %xmm8,%xmm9
+       pcmpgtd %xmm15,%xmm14
+       pxor    %xmm9,%xmm15
+       subq    $96,%rdx
+       jc      .Lxts_enc_short
+
+       shrl    $1,%eax
+       subl    $1,%eax
+       movl    %eax,%r10d
+       jmp     .Lxts_enc_grandloop
+
+.align 16
+.Lxts_enc_grandloop:
+       pshufd  $19,%xmm14,%xmm9
+       movdqa  %xmm15,%xmm14
+       paddq   %xmm15,%xmm15
+       movdqu  0(%rdi),%xmm2
+       pand    %xmm8,%xmm9
+       movdqu  16(%rdi),%xmm3
+       pxor    %xmm9,%xmm15
+
+       movdqu  32(%rdi),%xmm4
+       pxor    %xmm10,%xmm2
+       movdqu  48(%rdi),%xmm5
+       pxor    %xmm11,%xmm3
+       movdqu  64(%rdi),%xmm6
+       pxor    %xmm12,%xmm4
+       movdqu  80(%rdi),%xmm7
+       leaq    96(%rdi),%rdi
+       pxor    %xmm13,%xmm5
+       movups  (%r11),%xmm0
+       pxor    %xmm14,%xmm6
+       pxor    %xmm15,%xmm7
+
+
+
+       movups  16(%r11),%xmm1
+       pxor    %xmm0,%xmm2
+       pxor    %xmm0,%xmm3
+       movdqa  %xmm10,0(%rsp)
+.byte  102,15,56,220,209
+       leaq    32(%r11),%rcx
+       pxor    %xmm0,%xmm4
+       movdqa  %xmm11,16(%rsp)
+.byte  102,15,56,220,217
+       pxor    %xmm0,%xmm5
+       movdqa  %xmm12,32(%rsp)
+.byte  102,15,56,220,225
+       pxor    %xmm0,%xmm6
+       movdqa  %xmm13,48(%rsp)
+.byte  102,15,56,220,233
+       pxor    %xmm0,%xmm7
+       movups  (%rcx),%xmm0
+       decl    %eax
+       movdqa  %xmm14,64(%rsp)
+.byte  102,15,56,220,241
+       movdqa  %xmm15,80(%rsp)
+.byte  102,15,56,220,249
+       pxor    %xmm14,%xmm14
+       pcmpgtd %xmm15,%xmm14
+       jmp     .Lxts_enc_loop6_enter
+
+.align 16
+.Lxts_enc_loop6:
+.byte  102,15,56,220,209
+.byte  102,15,56,220,217
+       decl    %eax
+.byte  102,15,56,220,225
+.byte  102,15,56,220,233
+.byte  102,15,56,220,241
+.byte  102,15,56,220,249
+.Lxts_enc_loop6_enter:
+       movups  16(%rcx),%xmm1
+.byte  102,15,56,220,208
+.byte  102,15,56,220,216
+       leaq    32(%rcx),%rcx
+.byte  102,15,56,220,224
+.byte  102,15,56,220,232
+.byte  102,15,56,220,240
+.byte  102,15,56,220,248
+       movups  (%rcx),%xmm0
+       jnz     .Lxts_enc_loop6
+
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       paddq   %xmm15,%xmm15
+.byte  102,15,56,220,209
+       pand    %xmm8,%xmm9
+.byte  102,15,56,220,217
+       pcmpgtd %xmm15,%xmm14
+.byte  102,15,56,220,225
+       pxor    %xmm9,%xmm15
+.byte  102,15,56,220,233
+.byte  102,15,56,220,241
+.byte  102,15,56,220,249
+       movups  16(%rcx),%xmm1
+
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       movdqa  %xmm15,%xmm10
+       paddq   %xmm15,%xmm15
+.byte  102,15,56,220,208
+       pand    %xmm8,%xmm9
+.byte  102,15,56,220,216
+       pcmpgtd %xmm15,%xmm14
+.byte  102,15,56,220,224
+       pxor    %xmm9,%xmm15
+.byte  102,15,56,220,232
+.byte  102,15,56,220,240
+.byte  102,15,56,220,248
+       movups  32(%rcx),%xmm0
+
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       movdqa  %xmm15,%xmm11
+       paddq   %xmm15,%xmm15
+.byte  102,15,56,220,209
+       pand    %xmm8,%xmm9
+.byte  102,15,56,220,217
+       pcmpgtd %xmm15,%xmm14
+.byte  102,15,56,220,225
+       pxor    %xmm9,%xmm15
+.byte  102,15,56,220,233
+.byte  102,15,56,220,241
+.byte  102,15,56,220,249
+
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       movdqa  %xmm15,%xmm12
+       paddq   %xmm15,%xmm15
+.byte  102,15,56,221,208
+       pand    %xmm8,%xmm9
+.byte  102,15,56,221,216
+       pcmpgtd %xmm15,%xmm14
+.byte  102,15,56,221,224
+       pxor    %xmm9,%xmm15
+.byte  102,15,56,221,232
+.byte  102,15,56,221,240
+.byte  102,15,56,221,248
+
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       movdqa  %xmm15,%xmm13
+       paddq   %xmm15,%xmm15
+       xorps   0(%rsp),%xmm2
+       pand    %xmm8,%xmm9
+       xorps   16(%rsp),%xmm3
+       pcmpgtd %xmm15,%xmm14
+       pxor    %xmm9,%xmm15
+
+       xorps   32(%rsp),%xmm4
+       movups  %xmm2,0(%rsi)
+       xorps   48(%rsp),%xmm5
+       movups  %xmm3,16(%rsi)
+       xorps   64(%rsp),%xmm6
+       movups  %xmm4,32(%rsi)
+       xorps   80(%rsp),%xmm7
+       movups  %xmm5,48(%rsi)
+       movl    %r10d,%eax
+       movups  %xmm6,64(%rsi)
+       movups  %xmm7,80(%rsi)
+       leaq    96(%rsi),%rsi
+       subq    $96,%rdx
+       jnc     .Lxts_enc_grandloop
+
+       leal    3(%rax,%rax,1),%eax
+       movq    %r11,%rcx
+       movl    %eax,%r10d
+
+.Lxts_enc_short:
+       addq    $96,%rdx
+       jz      .Lxts_enc_done
+
+       cmpq    $32,%rdx
+       jb      .Lxts_enc_one
+       je      .Lxts_enc_two
+
+       cmpq    $64,%rdx
+       jb      .Lxts_enc_three
+       je      .Lxts_enc_four
+
+       pshufd  $19,%xmm14,%xmm9
+       movdqa  %xmm15,%xmm14
+       paddq   %xmm15,%xmm15
+       movdqu  (%rdi),%xmm2
+       pand    %xmm8,%xmm9
+       movdqu  16(%rdi),%xmm3
+       pxor    %xmm9,%xmm15
+
+       movdqu  32(%rdi),%xmm4
+       pxor    %xmm10,%xmm2
+       movdqu  48(%rdi),%xmm5
+       pxor    %xmm11,%xmm3
+       movdqu  64(%rdi),%xmm6
+       leaq    80(%rdi),%rdi
+       pxor    %xmm12,%xmm4
+       pxor    %xmm13,%xmm5
+       pxor    %xmm14,%xmm6
+
+       call    _aesni_encrypt6
+
+       xorps   %xmm10,%xmm2
+       movdqa  %xmm15,%xmm10
+       xorps   %xmm11,%xmm3
+       xorps   %xmm12,%xmm4
+       movdqu  %xmm2,(%rsi)
+       xorps   %xmm13,%xmm5
+       movdqu  %xmm3,16(%rsi)
+       xorps   %xmm14,%xmm6
+       movdqu  %xmm4,32(%rsi)
+       movdqu  %xmm5,48(%rsi)
+       movdqu  %xmm6,64(%rsi)
+       leaq    80(%rsi),%rsi
+       jmp     .Lxts_enc_done
+
+.align 16
+.Lxts_enc_one:
+       movups  (%rdi),%xmm2
+       leaq    16(%rdi),%rdi
+       xorps   %xmm10,%xmm2
+       movups  (%rcx),%xmm0
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+.Loop_enc1_9:
+.byte  102,15,56,220,209
+       decl    %eax
+       movups  (%rcx),%xmm1
+       leaq    16(%rcx),%rcx
+       jnz     .Loop_enc1_9    
+.byte  102,15,56,221,209
+       xorps   %xmm10,%xmm2
+       movdqa  %xmm11,%xmm10
+       movups  %xmm2,(%rsi)
+       leaq    16(%rsi),%rsi
+       jmp     .Lxts_enc_done
+
+.align 16
+.Lxts_enc_two:
+       movups  (%rdi),%xmm2
+       movups  16(%rdi),%xmm3
+       leaq    32(%rdi),%rdi
+       xorps   %xmm10,%xmm2
+       xorps   %xmm11,%xmm3
+
+       call    _aesni_encrypt3
+
+       xorps   %xmm10,%xmm2
+       movdqa  %xmm12,%xmm10
+       xorps   %xmm11,%xmm3
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       leaq    32(%rsi),%rsi
+       jmp     .Lxts_enc_done
+
+.align 16
+.Lxts_enc_three:
+       movups  (%rdi),%xmm2
+       movups  16(%rdi),%xmm3
+       movups  32(%rdi),%xmm4
+       leaq    48(%rdi),%rdi
+       xorps   %xmm10,%xmm2
+       xorps   %xmm11,%xmm3
+       xorps   %xmm12,%xmm4
+
+       call    _aesni_encrypt3
+
+       xorps   %xmm10,%xmm2
+       movdqa  %xmm13,%xmm10
+       xorps   %xmm11,%xmm3
+       xorps   %xmm12,%xmm4
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       leaq    48(%rsi),%rsi
+       jmp     .Lxts_enc_done
+
+.align 16
+.Lxts_enc_four:
+       movups  (%rdi),%xmm2
+       movups  16(%rdi),%xmm3
+       movups  32(%rdi),%xmm4
+       xorps   %xmm10,%xmm2
+       movups  48(%rdi),%xmm5
+       leaq    64(%rdi),%rdi
+       xorps   %xmm11,%xmm3
+       xorps   %xmm12,%xmm4
+       xorps   %xmm13,%xmm5
+
+       call    _aesni_encrypt4
+
+       xorps   %xmm10,%xmm2
+       movdqa  %xmm15,%xmm10
+       xorps   %xmm11,%xmm3
+       xorps   %xmm12,%xmm4
+       movups  %xmm2,(%rsi)
+       xorps   %xmm13,%xmm5
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       movups  %xmm5,48(%rsi)
+       leaq    64(%rsi),%rsi
+       jmp     .Lxts_enc_done
+
+.align 16
+.Lxts_enc_done:
+       andq    $15,%r9
+       jz      .Lxts_enc_ret
+       movq    %r9,%rdx
+
+.Lxts_enc_steal:
+       movzbl  (%rdi),%eax
+       movzbl  -16(%rsi),%ecx
+       leaq    1(%rdi),%rdi
+       movb    %al,-16(%rsi)
+       movb    %cl,0(%rsi)
+       leaq    1(%rsi),%rsi
+       subq    $1,%rdx
+       jnz     .Lxts_enc_steal
+
+       subq    %r9,%rsi
+       movq    %r11,%rcx
+       movl    %r10d,%eax
+
+       movups  -16(%rsi),%xmm2
+       xorps   %xmm10,%xmm2
+       movups  (%rcx),%xmm0
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+.Loop_enc1_10:
+.byte  102,15,56,220,209
+       decl    %eax
+       movups  (%rcx),%xmm1
+       leaq    16(%rcx),%rcx
+       jnz     .Loop_enc1_10   
+.byte  102,15,56,221,209
+       xorps   %xmm10,%xmm2
+       movups  %xmm2,-16(%rsi)
+
+.Lxts_enc_ret:
+       leaq    104(%rsp),%rsp
+.Lxts_enc_epilogue:
+       .byte   0xf3,0xc3
+.size  aesni_xts_encrypt,.-aesni_xts_encrypt
+.globl aesni_xts_decrypt
+.type  aesni_xts_decrypt,@function
+.align 16
+aesni_xts_decrypt:
+       leaq    -104(%rsp),%rsp
+       movups  (%r9),%xmm15
+       movl    240(%r8),%eax
+       movl    240(%rcx),%r10d
+       movups  (%r8),%xmm0
+       movups  16(%r8),%xmm1
+       leaq    32(%r8),%r8
+       xorps   %xmm0,%xmm15
+.Loop_enc1_11:
+.byte  102,68,15,56,220,249
+       decl    %eax
+       movups  (%r8),%xmm1
+       leaq    16(%r8),%r8
+       jnz     .Loop_enc1_11   
+.byte  102,68,15,56,221,249
+       xorl    %eax,%eax
+       testq   $15,%rdx
+       setnz   %al
+       shlq    $4,%rax
+       subq    %rax,%rdx
+
+       movq    %rcx,%r11
+       movl    %r10d,%eax
+       movq    %rdx,%r9
+       andq    $-16,%rdx
+
+       movdqa  .Lxts_magic(%rip),%xmm8
+       pxor    %xmm14,%xmm14
+       pcmpgtd %xmm15,%xmm14
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       movdqa  %xmm15,%xmm10
+       paddq   %xmm15,%xmm15
+       pand    %xmm8,%xmm9
+       pcmpgtd %xmm15,%xmm14
+       pxor    %xmm9,%xmm15
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       movdqa  %xmm15,%xmm11
+       paddq   %xmm15,%xmm15
+       pand    %xmm8,%xmm9
+       pcmpgtd %xmm15,%xmm14
+       pxor    %xmm9,%xmm15
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       movdqa  %xmm15,%xmm12
+       paddq   %xmm15,%xmm15
+       pand    %xmm8,%xmm9
+       pcmpgtd %xmm15,%xmm14
+       pxor    %xmm9,%xmm15
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       movdqa  %xmm15,%xmm13
+       paddq   %xmm15,%xmm15
+       pand    %xmm8,%xmm9
+       pcmpgtd %xmm15,%xmm14
+       pxor    %xmm9,%xmm15
+       subq    $96,%rdx
+       jc      .Lxts_dec_short
+
+       shrl    $1,%eax
+       subl    $1,%eax
+       movl    %eax,%r10d
+       jmp     .Lxts_dec_grandloop
+
+.align 16
+.Lxts_dec_grandloop:
+       pshufd  $19,%xmm14,%xmm9
+       movdqa  %xmm15,%xmm14
+       paddq   %xmm15,%xmm15
+       movdqu  0(%rdi),%xmm2
+       pand    %xmm8,%xmm9
+       movdqu  16(%rdi),%xmm3
+       pxor    %xmm9,%xmm15
+
+       movdqu  32(%rdi),%xmm4
+       pxor    %xmm10,%xmm2
+       movdqu  48(%rdi),%xmm5
+       pxor    %xmm11,%xmm3
+       movdqu  64(%rdi),%xmm6
+       pxor    %xmm12,%xmm4
+       movdqu  80(%rdi),%xmm7
+       leaq    96(%rdi),%rdi
+       pxor    %xmm13,%xmm5
+       movups  (%r11),%xmm0
+       pxor    %xmm14,%xmm6
+       pxor    %xmm15,%xmm7
+
+
+
+       movups  16(%r11),%xmm1
+       pxor    %xmm0,%xmm2
+       pxor    %xmm0,%xmm3
+       movdqa  %xmm10,0(%rsp)
+.byte  102,15,56,222,209
+       leaq    32(%r11),%rcx
+       pxor    %xmm0,%xmm4
+       movdqa  %xmm11,16(%rsp)
+.byte  102,15,56,222,217
+       pxor    %xmm0,%xmm5
+       movdqa  %xmm12,32(%rsp)
+.byte  102,15,56,222,225
+       pxor    %xmm0,%xmm6
+       movdqa  %xmm13,48(%rsp)
+.byte  102,15,56,222,233
+       pxor    %xmm0,%xmm7
+       movups  (%rcx),%xmm0
+       decl    %eax
+       movdqa  %xmm14,64(%rsp)
+.byte  102,15,56,222,241
+       movdqa  %xmm15,80(%rsp)
+.byte  102,15,56,222,249
+       pxor    %xmm14,%xmm14
+       pcmpgtd %xmm15,%xmm14
+       jmp     .Lxts_dec_loop6_enter
+
+.align 16
+.Lxts_dec_loop6:
+.byte  102,15,56,222,209
+.byte  102,15,56,222,217
+       decl    %eax
+.byte  102,15,56,222,225
+.byte  102,15,56,222,233
+.byte  102,15,56,222,241
+.byte  102,15,56,222,249
+.Lxts_dec_loop6_enter:
+       movups  16(%rcx),%xmm1
+.byte  102,15,56,222,208
+.byte  102,15,56,222,216
+       leaq    32(%rcx),%rcx
+.byte  102,15,56,222,224
+.byte  102,15,56,222,232
+.byte  102,15,56,222,240
+.byte  102,15,56,222,248
+       movups  (%rcx),%xmm0
+       jnz     .Lxts_dec_loop6
+
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       paddq   %xmm15,%xmm15
+.byte  102,15,56,222,209
+       pand    %xmm8,%xmm9
+.byte  102,15,56,222,217
+       pcmpgtd %xmm15,%xmm14
+.byte  102,15,56,222,225
+       pxor    %xmm9,%xmm15
+.byte  102,15,56,222,233
+.byte  102,15,56,222,241
+.byte  102,15,56,222,249
+       movups  16(%rcx),%xmm1
+
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       movdqa  %xmm15,%xmm10
+       paddq   %xmm15,%xmm15
+.byte  102,15,56,222,208
+       pand    %xmm8,%xmm9
+.byte  102,15,56,222,216
+       pcmpgtd %xmm15,%xmm14
+.byte  102,15,56,222,224
+       pxor    %xmm9,%xmm15
+.byte  102,15,56,222,232
+.byte  102,15,56,222,240
+.byte  102,15,56,222,248
+       movups  32(%rcx),%xmm0
+
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       movdqa  %xmm15,%xmm11
+       paddq   %xmm15,%xmm15
+.byte  102,15,56,222,209
+       pand    %xmm8,%xmm9
+.byte  102,15,56,222,217
+       pcmpgtd %xmm15,%xmm14
+.byte  102,15,56,222,225
+       pxor    %xmm9,%xmm15
+.byte  102,15,56,222,233
+.byte  102,15,56,222,241
+.byte  102,15,56,222,249
+
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       movdqa  %xmm15,%xmm12
+       paddq   %xmm15,%xmm15
+.byte  102,15,56,223,208
+       pand    %xmm8,%xmm9
+.byte  102,15,56,223,216
+       pcmpgtd %xmm15,%xmm14
+.byte  102,15,56,223,224
+       pxor    %xmm9,%xmm15
+.byte  102,15,56,223,232
+.byte  102,15,56,223,240
+.byte  102,15,56,223,248
+
+       pshufd  $19,%xmm14,%xmm9
+       pxor    %xmm14,%xmm14
+       movdqa  %xmm15,%xmm13
+       paddq   %xmm15,%xmm15
+       xorps   0(%rsp),%xmm2
+       pand    %xmm8,%xmm9
+       xorps   16(%rsp),%xmm3
+       pcmpgtd %xmm15,%xmm14
+       pxor    %xmm9,%xmm15
+
+       xorps   32(%rsp),%xmm4
+       movups  %xmm2,0(%rsi)
+       xorps   48(%rsp),%xmm5
+       movups  %xmm3,16(%rsi)
+       xorps   64(%rsp),%xmm6
+       movups  %xmm4,32(%rsi)
+       xorps   80(%rsp),%xmm7
+       movups  %xmm5,48(%rsi)
+       movl    %r10d,%eax
+       movups  %xmm6,64(%rsi)
+       movups  %xmm7,80(%rsi)
+       leaq    96(%rsi),%rsi
+       subq    $96,%rdx
+       jnc     .Lxts_dec_grandloop
+
+       leal    3(%rax,%rax,1),%eax
+       movq    %r11,%rcx
+       movl    %eax,%r10d
+
+.Lxts_dec_short:
+       addq    $96,%rdx
+       jz      .Lxts_dec_done
+
+       cmpq    $32,%rdx
+       jb      .Lxts_dec_one
+       je      .Lxts_dec_two
+
+       cmpq    $64,%rdx
+       jb      .Lxts_dec_three
+       je      .Lxts_dec_four
+
+       pshufd  $19,%xmm14,%xmm9
+       movdqa  %xmm15,%xmm14
+       paddq   %xmm15,%xmm15
+       movdqu  (%rdi),%xmm2
+       pand    %xmm8,%xmm9
+       movdqu  16(%rdi),%xmm3
+       pxor    %xmm9,%xmm15
+
+       movdqu  32(%rdi),%xmm4
+       pxor    %xmm10,%xmm2
+       movdqu  48(%rdi),%xmm5
+       pxor    %xmm11,%xmm3
+       movdqu  64(%rdi),%xmm6
+       leaq    80(%rdi),%rdi
+       pxor    %xmm12,%xmm4
+       pxor    %xmm13,%xmm5
+       pxor    %xmm14,%xmm6
+
+       call    _aesni_decrypt6
+
+       xorps   %xmm10,%xmm2
+       xorps   %xmm11,%xmm3
+       xorps   %xmm12,%xmm4
+       movdqu  %xmm2,(%rsi)
+       xorps   %xmm13,%xmm5
+       movdqu  %xmm3,16(%rsi)
+       xorps   %xmm14,%xmm6
+       movdqu  %xmm4,32(%rsi)
+       pxor    %xmm14,%xmm14
+       movdqu  %xmm5,48(%rsi)
+       pcmpgtd %xmm15,%xmm14
+       movdqu  %xmm6,64(%rsi)
+       leaq    80(%rsi),%rsi
+       pshufd  $19,%xmm14,%xmm11
+       andq    $15,%r9
+       jz      .Lxts_dec_ret
+
+       movdqa  %xmm15,%xmm10
+       paddq   %xmm15,%xmm15
+       pand    %xmm8,%xmm11
+       pxor    %xmm15,%xmm11
+       jmp     .Lxts_dec_done2
+
+.align 16
+.Lxts_dec_one:
+       movups  (%rdi),%xmm2
+       leaq    16(%rdi),%rdi
+       xorps   %xmm10,%xmm2
+       movups  (%rcx),%xmm0
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+.Loop_dec1_12:
+.byte  102,15,56,222,209
+       decl    %eax
+       movups  (%rcx),%xmm1
+       leaq    16(%rcx),%rcx
+       jnz     .Loop_dec1_12   
+.byte  102,15,56,223,209
+       xorps   %xmm10,%xmm2
+       movdqa  %xmm11,%xmm10
+       movups  %xmm2,(%rsi)
+       movdqa  %xmm12,%xmm11
+       leaq    16(%rsi),%rsi
+       jmp     .Lxts_dec_done
+
+.align 16
+.Lxts_dec_two:
+       movups  (%rdi),%xmm2
+       movups  16(%rdi),%xmm3
+       leaq    32(%rdi),%rdi
+       xorps   %xmm10,%xmm2
+       xorps   %xmm11,%xmm3
+
+       call    _aesni_decrypt3
+
+       xorps   %xmm10,%xmm2
+       movdqa  %xmm12,%xmm10
+       xorps   %xmm11,%xmm3
+       movdqa  %xmm13,%xmm11
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       leaq    32(%rsi),%rsi
+       jmp     .Lxts_dec_done
+
+.align 16
+.Lxts_dec_three:
+       movups  (%rdi),%xmm2
+       movups  16(%rdi),%xmm3
+       movups  32(%rdi),%xmm4
+       leaq    48(%rdi),%rdi
+       xorps   %xmm10,%xmm2
+       xorps   %xmm11,%xmm3
+       xorps   %xmm12,%xmm4
+
+       call    _aesni_decrypt3
+
+       xorps   %xmm10,%xmm2
+       movdqa  %xmm13,%xmm10
+       xorps   %xmm11,%xmm3
+       movdqa  %xmm15,%xmm11
+       xorps   %xmm12,%xmm4
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       leaq    48(%rsi),%rsi
+       jmp     .Lxts_dec_done
+
+.align 16
+.Lxts_dec_four:
+       pshufd  $19,%xmm14,%xmm9
+       movdqa  %xmm15,%xmm14
+       paddq   %xmm15,%xmm15
+       movups  (%rdi),%xmm2
+       pand    %xmm8,%xmm9
+       movups  16(%rdi),%xmm3
+       pxor    %xmm9,%xmm15
+
+       movups  32(%rdi),%xmm4
+       xorps   %xmm10,%xmm2
+       movups  48(%rdi),%xmm5
+       leaq    64(%rdi),%rdi
+       xorps   %xmm11,%xmm3
+       xorps   %xmm12,%xmm4
+       xorps   %xmm13,%xmm5
+
+       call    _aesni_decrypt4
+
+       xorps   %xmm10,%xmm2
+       movdqa  %xmm14,%xmm10
+       xorps   %xmm11,%xmm3
+       movdqa  %xmm15,%xmm11
+       xorps   %xmm12,%xmm4
+       movups  %xmm2,(%rsi)
+       xorps   %xmm13,%xmm5
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       movups  %xmm5,48(%rsi)
+       leaq    64(%rsi),%rsi
+       jmp     .Lxts_dec_done
+
+.align 16
+.Lxts_dec_done:
+       andq    $15,%r9
+       jz      .Lxts_dec_ret
+.Lxts_dec_done2:
+       movq    %r9,%rdx
+       movq    %r11,%rcx
+       movl    %r10d,%eax
+
+       movups  (%rdi),%xmm2
+       xorps   %xmm11,%xmm2
+       movups  (%rcx),%xmm0
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+.Loop_dec1_13:
+.byte  102,15,56,222,209
+       decl    %eax
+       movups  (%rcx),%xmm1
+       leaq    16(%rcx),%rcx
+       jnz     .Loop_dec1_13   
+.byte  102,15,56,223,209
+       xorps   %xmm11,%xmm2
+       movups  %xmm2,(%rsi)
+
+.Lxts_dec_steal:
+       movzbl  16(%rdi),%eax
+       movzbl  (%rsi),%ecx
+       leaq    1(%rdi),%rdi
+       movb    %al,(%rsi)
+       movb    %cl,16(%rsi)
+       leaq    1(%rsi),%rsi
+       subq    $1,%rdx
+       jnz     .Lxts_dec_steal
+
+       subq    %r9,%rsi
+       movq    %r11,%rcx
+       movl    %r10d,%eax
+
+       movups  (%rsi),%xmm2
+       xorps   %xmm10,%xmm2
+       movups  (%rcx),%xmm0
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+.Loop_dec1_14:
+.byte  102,15,56,222,209
+       decl    %eax
+       movups  (%rcx),%xmm1
+       leaq    16(%rcx),%rcx
+       jnz     .Loop_dec1_14   
+.byte  102,15,56,223,209
+       xorps   %xmm10,%xmm2
+       movups  %xmm2,(%rsi)
+
+.Lxts_dec_ret:
+       leaq    104(%rsp),%rsp
+.Lxts_dec_epilogue:
+       .byte   0xf3,0xc3
+.size  aesni_xts_decrypt,.-aesni_xts_decrypt
+.globl aesni_cbc_encrypt
+.type  aesni_cbc_encrypt,@function
+.align 16
+aesni_cbc_encrypt:
+       testq   %rdx,%rdx
+       jz      .Lcbc_ret
+
+       movl    240(%rcx),%r10d
+       movq    %rcx,%r11
+       testl   %r9d,%r9d
+       jz      .Lcbc_decrypt
+
+       movups  (%r8),%xmm2
+       movl    %r10d,%eax
+       cmpq    $16,%rdx
+       jb      .Lcbc_enc_tail
+       subq    $16,%rdx
+       jmp     .Lcbc_enc_loop
+.align 16
+.Lcbc_enc_loop:
+       movups  (%rdi),%xmm3
+       leaq    16(%rdi),%rdi
+
+       movups  (%rcx),%xmm0
+       movups  16(%rcx),%xmm1
+       xorps   %xmm0,%xmm3
+       leaq    32(%rcx),%rcx
+       xorps   %xmm3,%xmm2
+.Loop_enc1_15:
+.byte  102,15,56,220,209
+       decl    %eax
+       movups  (%rcx),%xmm1
+       leaq    16(%rcx),%rcx
+       jnz     .Loop_enc1_15   
+.byte  102,15,56,221,209
+       movl    %r10d,%eax
+       movq    %r11,%rcx
+       movups  %xmm2,0(%rsi)
+       leaq    16(%rsi),%rsi
+       subq    $16,%rdx
+       jnc     .Lcbc_enc_loop
+       addq    $16,%rdx
+       jnz     .Lcbc_enc_tail
+       movups  %xmm2,(%r8)
+       jmp     .Lcbc_ret
+
+.Lcbc_enc_tail:
+       movq    %rdx,%rcx
+       xchgq   %rdi,%rsi
+.long  0x9066A4F3      
+       movl    $16,%ecx
+       subq    %rdx,%rcx
+       xorl    %eax,%eax
+.long  0x9066AAF3      
+       leaq    -16(%rdi),%rdi
+       movl    %r10d,%eax
+       movq    %rdi,%rsi
+       movq    %r11,%rcx
+       xorq    %rdx,%rdx
+       jmp     .Lcbc_enc_loop  
+
+.align 16
+.Lcbc_decrypt:
+       movups  (%r8),%xmm9
+       movl    %r10d,%eax
+       cmpq    $112,%rdx
+       jbe     .Lcbc_dec_tail
+       shrl    $1,%r10d
+       subq    $112,%rdx
+       movl    %r10d,%eax
+       movaps  %xmm9,-24(%rsp)
+       jmp     .Lcbc_dec_loop8_enter
+.align 16
+.Lcbc_dec_loop8:
+       movaps  %xmm0,-24(%rsp)
+       movups  %xmm9,(%rsi)
+       leaq    16(%rsi),%rsi
+.Lcbc_dec_loop8_enter:
+       movups  (%rcx),%xmm0
+       movups  (%rdi),%xmm2
+       movups  16(%rdi),%xmm3
+       movups  16(%rcx),%xmm1
+
+       leaq    32(%rcx),%rcx
+       movdqu  32(%rdi),%xmm4
+       xorps   %xmm0,%xmm2
+       movdqu  48(%rdi),%xmm5
+       xorps   %xmm0,%xmm3
+       movdqu  64(%rdi),%xmm6
+.byte  102,15,56,222,209
+       pxor    %xmm0,%xmm4
+       movdqu  80(%rdi),%xmm7
+.byte  102,15,56,222,217
+       pxor    %xmm0,%xmm5
+       movdqu  96(%rdi),%xmm8
+.byte  102,15,56,222,225
+       pxor    %xmm0,%xmm6
+       movdqu  112(%rdi),%xmm9
+.byte  102,15,56,222,233
+       pxor    %xmm0,%xmm7
+       decl    %eax
+.byte  102,15,56,222,241
+       pxor    %xmm0,%xmm8
+.byte  102,15,56,222,249
+       pxor    %xmm0,%xmm9
+       movups  (%rcx),%xmm0
+.byte  102,68,15,56,222,193
+.byte  102,68,15,56,222,201
+       movups  16(%rcx),%xmm1
+
+       call    .Ldec_loop8_enter
+
+       movups  (%rdi),%xmm1
+       movups  16(%rdi),%xmm0
+       xorps   -24(%rsp),%xmm2
+       xorps   %xmm1,%xmm3
+       movups  32(%rdi),%xmm1
+       xorps   %xmm0,%xmm4
+       movups  48(%rdi),%xmm0
+       xorps   %xmm1,%xmm5
+       movups  64(%rdi),%xmm1
+       xorps   %xmm0,%xmm6
+       movups  80(%rdi),%xmm0
+       xorps   %xmm1,%xmm7
+       movups  96(%rdi),%xmm1
+       xorps   %xmm0,%xmm8
+       movups  112(%rdi),%xmm0
+       xorps   %xmm1,%xmm9
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       movups  %xmm5,48(%rsi)
+       movl    %r10d,%eax
+       movups  %xmm6,64(%rsi)
+       movq    %r11,%rcx
+       movups  %xmm7,80(%rsi)
+       leaq    128(%rdi),%rdi
+       movups  %xmm8,96(%rsi)
+       leaq    112(%rsi),%rsi
+       subq    $128,%rdx
+       ja      .Lcbc_dec_loop8
+
+       movaps  %xmm9,%xmm2
+       movaps  %xmm0,%xmm9
+       addq    $112,%rdx
+       jle     .Lcbc_dec_tail_collected
+       movups  %xmm2,(%rsi)
+       leal    1(%r10,%r10,1),%eax
+       leaq    16(%rsi),%rsi
+.Lcbc_dec_tail:
+       movups  (%rdi),%xmm2
+       movaps  %xmm2,%xmm8
+       cmpq    $16,%rdx
+       jbe     .Lcbc_dec_one
+
+       movups  16(%rdi),%xmm3
+       movaps  %xmm3,%xmm7
+       cmpq    $32,%rdx
+       jbe     .Lcbc_dec_two
+
+       movups  32(%rdi),%xmm4
+       movaps  %xmm4,%xmm6
+       cmpq    $48,%rdx
+       jbe     .Lcbc_dec_three
+
+       movups  48(%rdi),%xmm5
+       cmpq    $64,%rdx
+       jbe     .Lcbc_dec_four
+
+       movups  64(%rdi),%xmm6
+       cmpq    $80,%rdx
+       jbe     .Lcbc_dec_five
+
+       movups  80(%rdi),%xmm7
+       cmpq    $96,%rdx
+       jbe     .Lcbc_dec_six
+
+       movups  96(%rdi),%xmm8
+       movaps  %xmm9,-24(%rsp)
+       call    _aesni_decrypt8
+       movups  (%rdi),%xmm1
+       movups  16(%rdi),%xmm0
+       xorps   -24(%rsp),%xmm2
+       xorps   %xmm1,%xmm3
+       movups  32(%rdi),%xmm1
+       xorps   %xmm0,%xmm4
+       movups  48(%rdi),%xmm0
+       xorps   %xmm1,%xmm5
+       movups  64(%rdi),%xmm1
+       xorps   %xmm0,%xmm6
+       movups  80(%rdi),%xmm0
+       xorps   %xmm1,%xmm7
+       movups  96(%rdi),%xmm9
+       xorps   %xmm0,%xmm8
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       movups  %xmm5,48(%rsi)
+       movups  %xmm6,64(%rsi)
+       movups  %xmm7,80(%rsi)
+       leaq    96(%rsi),%rsi
+       movaps  %xmm8,%xmm2
+       subq    $112,%rdx
+       jmp     .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_one:
+       movups  (%rcx),%xmm0
+       movups  16(%rcx),%xmm1
+       leaq    32(%rcx),%rcx
+       xorps   %xmm0,%xmm2
+.Loop_dec1_16:
+.byte  102,15,56,222,209
+       decl    %eax
+       movups  (%rcx),%xmm1
+       leaq    16(%rcx),%rcx
+       jnz     .Loop_dec1_16   
+.byte  102,15,56,223,209
+       xorps   %xmm9,%xmm2
+       movaps  %xmm8,%xmm9
+       subq    $16,%rdx
+       jmp     .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_two:
+       xorps   %xmm4,%xmm4
+       call    _aesni_decrypt3
+       xorps   %xmm9,%xmm2
+       xorps   %xmm8,%xmm3
+       movups  %xmm2,(%rsi)
+       movaps  %xmm7,%xmm9
+       movaps  %xmm3,%xmm2
+       leaq    16(%rsi),%rsi
+       subq    $32,%rdx
+       jmp     .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_three:
+       call    _aesni_decrypt3
+       xorps   %xmm9,%xmm2
+       xorps   %xmm8,%xmm3
+       movups  %xmm2,(%rsi)
+       xorps   %xmm7,%xmm4
+       movups  %xmm3,16(%rsi)
+       movaps  %xmm6,%xmm9
+       movaps  %xmm4,%xmm2
+       leaq    32(%rsi),%rsi
+       subq    $48,%rdx
+       jmp     .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_four:
+       call    _aesni_decrypt4
+       xorps   %xmm9,%xmm2
+       movups  48(%rdi),%xmm9
+       xorps   %xmm8,%xmm3
+       movups  %xmm2,(%rsi)
+       xorps   %xmm7,%xmm4
+       movups  %xmm3,16(%rsi)
+       xorps   %xmm6,%xmm5
+       movups  %xmm4,32(%rsi)
+       movaps  %xmm5,%xmm2
+       leaq    48(%rsi),%rsi
+       subq    $64,%rdx
+       jmp     .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_five:
+       xorps   %xmm7,%xmm7
+       call    _aesni_decrypt6
+       movups  16(%rdi),%xmm1
+       movups  32(%rdi),%xmm0
+       xorps   %xmm9,%xmm2
+       xorps   %xmm8,%xmm3
+       xorps   %xmm1,%xmm4
+       movups  48(%rdi),%xmm1
+       xorps   %xmm0,%xmm5
+       movups  64(%rdi),%xmm9
+       xorps   %xmm1,%xmm6
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       movups  %xmm5,48(%rsi)
+       leaq    64(%rsi),%rsi
+       movaps  %xmm6,%xmm2
+       subq    $80,%rdx
+       jmp     .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_six:
+       call    _aesni_decrypt6
+       movups  16(%rdi),%xmm1
+       movups  32(%rdi),%xmm0
+       xorps   %xmm9,%xmm2
+       xorps   %xmm8,%xmm3
+       xorps   %xmm1,%xmm4
+       movups  48(%rdi),%xmm1
+       xorps   %xmm0,%xmm5
+       movups  64(%rdi),%xmm0
+       xorps   %xmm1,%xmm6
+       movups  80(%rdi),%xmm9
+       xorps   %xmm0,%xmm7
+       movups  %xmm2,(%rsi)
+       movups  %xmm3,16(%rsi)
+       movups  %xmm4,32(%rsi)
+       movups  %xmm5,48(%rsi)
+       movups  %xmm6,64(%rsi)
+       leaq    80(%rsi),%rsi
+       movaps  %xmm7,%xmm2
+       subq    $96,%rdx
+       jmp     .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_tail_collected:
+       andq    $15,%rdx
+       movups  %xmm9,(%r8)
+       jnz     .Lcbc_dec_tail_partial
+       movups  %xmm2,(%rsi)
+       jmp     .Lcbc_dec_ret
+.align 16
+.Lcbc_dec_tail_partial:
+       movaps  %xmm2,-24(%rsp)
+       movq    $16,%rcx
+       movq    %rsi,%rdi
+       subq    %rdx,%rcx
+       leaq    -24(%rsp),%rsi
+.long  0x9066A4F3      
+
+.Lcbc_dec_ret:
+.Lcbc_ret:
+       .byte   0xf3,0xc3
+.size  aesni_cbc_encrypt,.-aesni_cbc_encrypt
+.globl aesni_set_decrypt_key
+.type  aesni_set_decrypt_key,@function
+.align 16
+aesni_set_decrypt_key:
+.byte  0x48,0x83,0xEC,0x08     
+       call    __aesni_set_encrypt_key
+       shll    $4,%esi
+       testl   %eax,%eax
+       jnz     .Ldec_key_ret
+       leaq    16(%rdx,%rsi,1),%rdi
+
+       movups  (%rdx),%xmm0
+       movups  (%rdi),%xmm1
+       movups  %xmm0,(%rdi)
+       movups  %xmm1,(%rdx)
+       leaq    16(%rdx),%rdx
+       leaq    -16(%rdi),%rdi
+
+.Ldec_key_inverse:
+       movups  (%rdx),%xmm0
+       movups  (%rdi),%xmm1
+.byte  102,15,56,219,192
+.byte  102,15,56,219,201
+       leaq    16(%rdx),%rdx
+       leaq    -16(%rdi),%rdi
+       movups  %xmm0,16(%rdi)
+       movups  %xmm1,-16(%rdx)
+       cmpq    %rdx,%rdi
+       ja      .Ldec_key_inverse
+
+       movups  (%rdx),%xmm0
+.byte  102,15,56,219,192
+       movups  %xmm0,(%rdi)
+.Ldec_key_ret:
+       addq    $8,%rsp
+       .byte   0xf3,0xc3
+.LSEH_end_set_decrypt_key:
+.size  aesni_set_decrypt_key,.-aesni_set_decrypt_key
+.globl aesni_set_encrypt_key
+.type  aesni_set_encrypt_key,@function
+.align 16
+aesni_set_encrypt_key:
+__aesni_set_encrypt_key:
+.byte  0x48,0x83,0xEC,0x08     
+       movq    $-1,%rax
+       testq   %rdi,%rdi
+       jz      .Lenc_key_ret
+       testq   %rdx,%rdx
+       jz      .Lenc_key_ret
+
+       movups  (%rdi),%xmm0
+       xorps   %xmm4,%xmm4
+       leaq    16(%rdx),%rax
+       cmpl    $256,%esi
+       je      .L14rounds
+       cmpl    $192,%esi
+       je      .L12rounds
+       cmpl    $128,%esi
+       jne     .Lbad_keybits
+
+.L10rounds:
+       movl    $9,%esi
+       movups  %xmm0,(%rdx)
+.byte  102,15,58,223,200,1
+       call    .Lkey_expansion_128_cold
+.byte  102,15,58,223,200,2
+       call    .Lkey_expansion_128
+.byte  102,15,58,223,200,4
+       call    .Lkey_expansion_128
+.byte  102,15,58,223,200,8
+       call    .Lkey_expansion_128
+.byte  102,15,58,223,200,16
+       call    .Lkey_expansion_128
+.byte  102,15,58,223,200,32
+       call    .Lkey_expansion_128
+.byte  102,15,58,223,200,64
+       call    .Lkey_expansion_128
+.byte  102,15,58,223,200,128
+       call    .Lkey_expansion_128
+.byte  102,15,58,223,200,27
+       call    .Lkey_expansion_128
+.byte  102,15,58,223,200,54
+       call    .Lkey_expansion_128
+       movups  %xmm0,(%rax)
+       movl    %esi,80(%rax)
+       xorl    %eax,%eax
+       jmp     .Lenc_key_ret
+
+.align 16
+.L12rounds:
+       movq    16(%rdi),%xmm2
+       movl    $11,%esi
+       movups  %xmm0,(%rdx)
+.byte  102,15,58,223,202,1
+       call    .Lkey_expansion_192a_cold
+.byte  102,15,58,223,202,2
+       call    .Lkey_expansion_192b
+.byte  102,15,58,223,202,4
+       call    .Lkey_expansion_192a
+.byte  102,15,58,223,202,8
+       call    .Lkey_expansion_192b
+.byte  102,15,58,223,202,16
+       call    .Lkey_expansion_192a
+.byte  102,15,58,223,202,32
+       call    .Lkey_expansion_192b
+.byte  102,15,58,223,202,64
+       call    .Lkey_expansion_192a
+.byte  102,15,58,223,202,128
+       call    .Lkey_expansion_192b
+       movups  %xmm0,(%rax)
+       movl    %esi,48(%rax)
+       xorq    %rax,%rax
+       jmp     .Lenc_key_ret
+
+.align 16
+.L14rounds:
+       movups  16(%rdi),%xmm2
+       movl    $13,%esi
+       leaq    16(%rax),%rax
+       movups  %xmm0,(%rdx)
+       movups  %xmm2,16(%rdx)
+.byte  102,15,58,223,202,1
+       call    .Lkey_expansion_256a_cold
+.byte  102,15,58,223,200,1
+       call    .Lkey_expansion_256b
+.byte  102,15,58,223,202,2
+       call    .Lkey_expansion_256a
+.byte  102,15,58,223,200,2
+       call    .Lkey_expansion_256b
+.byte  102,15,58,223,202,4
+       call    .Lkey_expansion_256a
+.byte  102,15,58,223,200,4
+       call    .Lkey_expansion_256b
+.byte  102,15,58,223,202,8
+       call    .Lkey_expansion_256a
+.byte  102,15,58,223,200,8
+       call    .Lkey_expansion_256b
+.byte  102,15,58,223,202,16
+       call    .Lkey_expansion_256a
+.byte  102,15,58,223,200,16
+       call    .Lkey_expansion_256b
+.byte  102,15,58,223,202,32
+       call    .Lkey_expansion_256a
+.byte  102,15,58,223,200,32
+       call    .Lkey_expansion_256b
+.byte  102,15,58,223,202,64
+       call    .Lkey_expansion_256a
+       movups  %xmm0,(%rax)
+       movl    %esi,16(%rax)
+       xorq    %rax,%rax
+       jmp     .Lenc_key_ret
+
+.align 16
+.Lbad_keybits:
+       movq    $-2,%rax
+.Lenc_key_ret:
+       addq    $8,%rsp
+       .byte   0xf3,0xc3
+.LSEH_end_set_encrypt_key:
+
+.align 16
+.Lkey_expansion_128:
+       movups  %xmm0,(%rax)
+       leaq    16(%rax),%rax
+.Lkey_expansion_128_cold:
+       shufps  $16,%xmm0,%xmm4
+       xorps   %xmm4,%xmm0
+       shufps  $140,%xmm0,%xmm4
+       xorps   %xmm4,%xmm0
+       shufps  $255,%xmm1,%xmm1
+       xorps   %xmm1,%xmm0
+       .byte   0xf3,0xc3
+
+.align 16
+.Lkey_expansion_192a:
+       movups  %xmm0,(%rax)
+       leaq    16(%rax),%rax
+.Lkey_expansion_192a_cold:
+       movaps  %xmm2,%xmm5
+.Lkey_expansion_192b_warm:
+       shufps  $16,%xmm0,%xmm4
+       movdqa  %xmm2,%xmm3
+       xorps   %xmm4,%xmm0
+       shufps  $140,%xmm0,%xmm4
+       pslldq  $4,%xmm3
+       xorps   %xmm4,%xmm0
+       pshufd  $85,%xmm1,%xmm1
+       pxor    %xmm3,%xmm2
+       pxor    %xmm1,%xmm0
+       pshufd  $255,%xmm0,%xmm3
+       pxor    %xmm3,%xmm2
+       .byte   0xf3,0xc3
+
+.align 16
+.Lkey_expansion_192b:
+       movaps  %xmm0,%xmm3
+       shufps  $68,%xmm0,%xmm5
+       movups  %xmm5,(%rax)
+       shufps  $78,%xmm2,%xmm3
+       movups  %xmm3,16(%rax)
+       leaq    32(%rax),%rax
+       jmp     .Lkey_expansion_192b_warm
+
+.align 16
+.Lkey_expansion_256a:
+       movups  %xmm2,(%rax)
+       leaq    16(%rax),%rax
+.Lkey_expansion_256a_cold:
+       shufps  $16,%xmm0,%xmm4
+       xorps   %xmm4,%xmm0
+       shufps  $140,%xmm0,%xmm4
+       xorps   %xmm4,%xmm0
+       shufps  $255,%xmm1,%xmm1
+       xorps   %xmm1,%xmm0
+       .byte   0xf3,0xc3
+
+.align 16
+.Lkey_expansion_256b:
+       movups  %xmm0,(%rax)
+       leaq    16(%rax),%rax
+
+       shufps  $16,%xmm2,%xmm4
+       xorps   %xmm4,%xmm2
+       shufps  $140,%xmm2,%xmm4
+       xorps   %xmm4,%xmm2
+       shufps  $170,%xmm1,%xmm1
+       xorps   %xmm1,%xmm2
+       .byte   0xf3,0xc3
+.size  aesni_set_encrypt_key,.-aesni_set_encrypt_key
+.size  __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
+.align 64
+.Lbswap_mask:
+.byte  15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.Lincrement32:
+.long  6,6,6,0
+.Lincrement64:
+.long  1,0,0,0
+.Lxts_magic:
+.long  0x87,0,1,0
+
+.byte  65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
diff --git a/secure/lib/libcrypto/asm/bsaes-x86_64.s b/secure/lib/libcrypto/asm/bsaes-x86_64.s
new file mode 100644 (file)
index 0000000..c50be0f
--- /dev/null
@@ -0,0 +1,2576 @@
+.text  
+
+
+
+
+.type  _bsaes_encrypt8,@function
+.align 64
+_bsaes_encrypt8:
+       leaq    .LBS0(%rip),%r11
+
+       movdqa  (%rax),%xmm8
+       leaq    16(%rax),%rax
+       movdqa  96(%r11),%xmm7
+       pxor    %xmm8,%xmm15
+       pxor    %xmm8,%xmm0
+.byte  102,68,15,56,0,255
+       pxor    %xmm8,%xmm1
+.byte  102,15,56,0,199
+       pxor    %xmm8,%xmm2
+.byte  102,15,56,0,207
+       pxor    %xmm8,%xmm3
+.byte  102,15,56,0,215
+       pxor    %xmm8,%xmm4
+.byte  102,15,56,0,223
+       pxor    %xmm8,%xmm5
+.byte  102,15,56,0,231
+       pxor    %xmm8,%xmm6
+.byte  102,15,56,0,239
+.byte  102,15,56,0,247
+_bsaes_encrypt8_bitslice:
+       movdqa  0(%r11),%xmm7
+       movdqa  16(%r11),%xmm8
+       movdqa  %xmm5,%xmm9
+       psrlq   $1,%xmm5
+       movdqa  %xmm3,%xmm10
+       psrlq   $1,%xmm3
+       pxor    %xmm6,%xmm5
+       pxor    %xmm4,%xmm3
+       pand    %xmm7,%xmm5
+       pand    %xmm7,%xmm3
+       pxor    %xmm5,%xmm6
+       psllq   $1,%xmm5
+       pxor    %xmm3,%xmm4
+       psllq   $1,%xmm3
+       pxor    %xmm9,%xmm5
+       pxor    %xmm10,%xmm3
+       movdqa  %xmm1,%xmm9
+       psrlq   $1,%xmm1
+       movdqa  %xmm15,%xmm10
+       psrlq   $1,%xmm15
+       pxor    %xmm2,%xmm1
+       pxor    %xmm0,%xmm15
+       pand    %xmm7,%xmm1
+       pand    %xmm7,%xmm15
+       pxor    %xmm1,%xmm2
+       psllq   $1,%xmm1
+       pxor    %xmm15,%xmm0
+       psllq   $1,%xmm15
+       pxor    %xmm9,%xmm1
+       pxor    %xmm10,%xmm15
+       movdqa  32(%r11),%xmm7
+       movdqa  %xmm4,%xmm9
+       psrlq   $2,%xmm4
+       movdqa  %xmm3,%xmm10
+       psrlq   $2,%xmm3
+       pxor    %xmm6,%xmm4
+       pxor    %xmm5,%xmm3
+       pand    %xmm8,%xmm4
+       pand    %xmm8,%xmm3
+       pxor    %xmm4,%xmm6
+       psllq   $2,%xmm4
+       pxor    %xmm3,%xmm5
+       psllq   $2,%xmm3
+       pxor    %xmm9,%xmm4
+       pxor    %xmm10,%xmm3
+       movdqa  %xmm0,%xmm9
+       psrlq   $2,%xmm0
+       movdqa  %xmm15,%xmm10
+       psrlq   $2,%xmm15
+       pxor    %xmm2,%xmm0
+       pxor    %xmm1,%xmm15
+       pand    %xmm8,%xmm0
+       pand    %xmm8,%xmm15
+       pxor    %xmm0,%xmm2
+       psllq   $2,%xmm0
+       pxor    %xmm15,%xmm1
+       psllq   $2,%xmm15
+       pxor    %xmm9,%xmm0
+       pxor    %xmm10,%xmm15
+       movdqa  %xmm2,%xmm9
+       psrlq   $4,%xmm2
+       movdqa  %xmm1,%xmm10
+       psrlq   $4,%xmm1
+       pxor    %xmm6,%xmm2
+       pxor    %xmm5,%xmm1
+       pand    %xmm7,%xmm2
+       pand    %xmm7,%xmm1
+       pxor    %xmm2,%xmm6
+       psllq   $4,%xmm2
+       pxor    %xmm1,%xmm5
+       psllq   $4,%xmm1
+       pxor    %xmm9,%xmm2
+       pxor    %xmm10,%xmm1
+       movdqa  %xmm0,%xmm9
+       psrlq   $4,%xmm0
+       movdqa  %xmm15,%xmm10
+       psrlq   $4,%xmm15
+       pxor    %xmm4,%xmm0
+       pxor    %xmm3,%xmm15
+       pand    %xmm7,%xmm0
+       pand    %xmm7,%xmm15
+       pxor    %xmm0,%xmm4
+       psllq   $4,%xmm0
+       pxor    %xmm15,%xmm3
+       psllq   $4,%xmm15
+       pxor    %xmm9,%xmm0
+       pxor    %xmm10,%xmm15
+       decl    %r10d
+       jmp     .Lenc_sbox
+.align 16
+.Lenc_loop:
+       pxor    0(%rax),%xmm15
+       pxor    16(%rax),%xmm0
+.byte  102,68,15,56,0,255
+       pxor    32(%rax),%xmm1
+.byte  102,15,56,0,199
+       pxor    48(%rax),%xmm2
+.byte  102,15,56,0,207
+       pxor    64(%rax),%xmm3
+.byte  102,15,56,0,215
+       pxor    80(%rax),%xmm4
+.byte  102,15,56,0,223
+       pxor    96(%rax),%xmm5
+.byte  102,15,56,0,231
+       pxor    112(%rax),%xmm6
+.byte  102,15,56,0,239
+       leaq    128(%rax),%rax
+.byte  102,15,56,0,247
+.Lenc_sbox:
+       pxor    %xmm5,%xmm4
+       pxor    %xmm0,%xmm1
+       pxor    %xmm15,%xmm2
+       pxor    %xmm1,%xmm5
+       pxor    %xmm15,%xmm4
+
+       pxor    %xmm2,%xmm5
+       pxor    %xmm6,%xmm2
+       pxor    %xmm4,%xmm6
+       pxor    %xmm3,%xmm2
+       pxor    %xmm4,%xmm3
+       pxor    %xmm0,%xmm2
+
+       pxor    %xmm6,%xmm1
+       pxor    %xmm4,%xmm0
+       movdqa  %xmm6,%xmm10
+       movdqa  %xmm0,%xmm9
+       movdqa  %xmm4,%xmm8
+       movdqa  %xmm1,%xmm12
+       movdqa  %xmm5,%xmm11
+
+       pxor    %xmm3,%xmm10
+       pxor    %xmm1,%xmm9
+       pxor    %xmm2,%xmm8
+       movdqa  %xmm10,%xmm13
+       pxor    %xmm3,%xmm12
+       movdqa  %xmm9,%xmm7
+       pxor    %xmm15,%xmm11
+       movdqa  %xmm10,%xmm14
+
+       por     %xmm8,%xmm9
+       por     %xmm11,%xmm10
+       pxor    %xmm7,%xmm14
+       pand    %xmm11,%xmm13
+       pxor    %xmm8,%xmm11
+       pand    %xmm8,%xmm7
+       pand    %xmm11,%xmm14
+       movdqa  %xmm2,%xmm11
+       pxor    %xmm15,%xmm11
+       pand    %xmm11,%xmm12
+       pxor    %xmm12,%xmm10
+       pxor    %xmm12,%xmm9
+       movdqa  %xmm6,%xmm12
+       movdqa  %xmm4,%xmm11
+       pxor    %xmm0,%xmm12
+       pxor    %xmm5,%xmm11
+       movdqa  %xmm12,%xmm8
+       pand    %xmm11,%xmm12
+       por     %xmm11,%xmm8
+       pxor    %xmm12,%xmm7
+       pxor    %xmm14,%xmm10
+       pxor    %xmm13,%xmm9
+       pxor    %xmm14,%xmm8
+       movdqa  %xmm1,%xmm11
+       pxor    %xmm13,%xmm7
+       movdqa  %xmm3,%xmm12
+       pxor    %xmm13,%xmm8
+       movdqa  %xmm0,%xmm13
+       pand    %xmm2,%xmm11
+       movdqa  %xmm6,%xmm14
+       pand    %xmm15,%xmm12
+       pand    %xmm4,%xmm13
+       por     %xmm5,%xmm14
+       pxor    %xmm11,%xmm10
+       pxor    %xmm12,%xmm9
+       pxor    %xmm13,%xmm8
+       pxor    %xmm14,%xmm7
+
+
+
+
+
+       movdqa  %xmm10,%xmm11
+       pand    %xmm8,%xmm10
+       pxor    %xmm9,%xmm11
+
+       movdqa  %xmm7,%xmm13
+       movdqa  %xmm11,%xmm14
+       pxor    %xmm10,%xmm13
+       pand    %xmm13,%xmm14
+
+       movdqa  %xmm8,%xmm12
+       pxor    %xmm9,%xmm14
+       pxor    %xmm7,%xmm12
+
+       pxor    %xmm9,%xmm10
+
+       pand    %xmm10,%xmm12
+
+       movdqa  %xmm13,%xmm9
+       pxor    %xmm7,%xmm12
+
+       pxor    %xmm12,%xmm9
+       pxor    %xmm12,%xmm8
+
+       pand    %xmm7,%xmm9
+
+       pxor    %xmm9,%xmm13
+       pxor    %xmm9,%xmm8
+
+       pand    %xmm14,%xmm13
+
+       pxor    %xmm11,%xmm13
+       movdqa  %xmm5,%xmm11
+       movdqa  %xmm4,%xmm7
+       movdqa  %xmm14,%xmm9
+       pxor    %xmm13,%xmm9
+       pand    %xmm5,%xmm9
+       pxor    %xmm4,%xmm5
+       pand    %xmm14,%xmm4
+       pand    %xmm13,%xmm5
+       pxor    %xmm4,%xmm5
+       pxor    %xmm9,%xmm4
+       pxor    %xmm15,%xmm11
+       pxor    %xmm2,%xmm7
+       pxor    %xmm12,%xmm14
+       pxor    %xmm8,%xmm13
+       movdqa  %xmm14,%xmm10
+       movdqa  %xmm12,%xmm9
+       pxor    %xmm13,%xmm10
+       pxor    %xmm8,%xmm9
+       pand    %xmm11,%xmm10
+       pand    %xmm15,%xmm9
+       pxor    %xmm7,%xmm11
+       pxor    %xmm2,%xmm15
+       pand    %xmm14,%xmm7
+       pand    %xmm12,%xmm2
+       pand    %xmm13,%xmm11
+       pand    %xmm8,%xmm15
+       pxor    %xmm11,%xmm7
+       pxor    %xmm2,%xmm15
+       pxor    %xmm10,%xmm11
+       pxor    %xmm9,%xmm2
+       pxor    %xmm11,%xmm5
+       pxor    %xmm11,%xmm15
+       pxor    %xmm7,%xmm4
+       pxor    %xmm7,%xmm2
+
+       movdqa  %xmm6,%xmm11
+       movdqa  %xmm0,%xmm7
+       pxor    %xmm3,%xmm11
+       pxor    %xmm1,%xmm7
+       movdqa  %xmm14,%xmm10
+       movdqa  %xmm12,%xmm9
+       pxor    %xmm13,%xmm10
+       pxor    %xmm8,%xmm9
+       pand    %xmm11,%xmm10
+       pand    %xmm3,%xmm9
+       pxor    %xmm7,%xmm11
+       pxor    %xmm1,%xmm3
+       pand    %xmm14,%xmm7
+       pand    %xmm12,%xmm1
+       pand    %xmm13,%xmm11
+       pand    %xmm8,%xmm3
+       pxor    %xmm11,%xmm7
+       pxor    %xmm1,%xmm3
+       pxor    %xmm10,%xmm11
+       pxor    %xmm9,%xmm1
+       pxor    %xmm12,%xmm14
+       pxor    %xmm8,%xmm13
+       movdqa  %xmm14,%xmm10
+       pxor    %xmm13,%xmm10
+       pand    %xmm6,%xmm10
+       pxor    %xmm0,%xmm6
+       pand    %xmm14,%xmm0
+       pand    %xmm13,%xmm6
+       pxor    %xmm0,%xmm6
+       pxor    %xmm10,%xmm0
+       pxor    %xmm11,%xmm6
+       pxor    %xmm11,%xmm3
+       pxor    %xmm7,%xmm0
+       pxor    %xmm7,%xmm1
+       pxor    %xmm15,%xmm6
+       pxor    %xmm5,%xmm0
+       pxor    %xmm6,%xmm3
+       pxor    %xmm15,%xmm5
+       pxor    %xmm0,%xmm15
+
+       pxor    %xmm4,%xmm0
+       pxor    %xmm1,%xmm4
+       pxor    %xmm2,%xmm1
+       pxor    %xmm4,%xmm2
+       pxor    %xmm4,%xmm3
+
+       pxor    %xmm2,%xmm5
+       decl    %r10d
+       jl      .Lenc_done
+       pshufd  $147,%xmm15,%xmm7
+       pshufd  $147,%xmm0,%xmm8
+       pxor    %xmm7,%xmm15
+       pshufd  $147,%xmm3,%xmm9
+       pxor    %xmm8,%xmm0
+       pshufd  $147,%xmm5,%xmm10
+       pxor    %xmm9,%xmm3
+       pshufd  $147,%xmm2,%xmm11
+       pxor    %xmm10,%xmm5
+       pshufd  $147,%xmm6,%xmm12
+       pxor    %xmm11,%xmm2
+       pshufd  $147,%xmm1,%xmm13
+       pxor    %xmm12,%xmm6
+       pshufd  $147,%xmm4,%xmm14
+       pxor    %xmm13,%xmm1
+       pxor    %xmm14,%xmm4
+
+       pxor    %xmm15,%xmm8
+       pxor    %xmm4,%xmm7
+       pxor    %xmm4,%xmm8
+       pshufd  $78,%xmm15,%xmm15
+       pxor    %xmm0,%xmm9
+       pshufd  $78,%xmm0,%xmm0
+       pxor    %xmm2,%xmm12
+       pxor    %xmm7,%xmm15
+       pxor    %xmm6,%xmm13
+       pxor    %xmm8,%xmm0
+       pxor    %xmm5,%xmm11
+       pshufd  $78,%xmm2,%xmm7
+       pxor    %xmm1,%xmm14
+       pshufd  $78,%xmm6,%xmm8
+       pxor    %xmm3,%xmm10
+       pshufd  $78,%xmm5,%xmm2
+       pxor    %xmm4,%xmm10
+       pshufd  $78,%xmm4,%xmm6
+       pxor    %xmm4,%xmm11
+       pshufd  $78,%xmm1,%xmm5
+       pxor    %xmm11,%xmm7
+       pshufd  $78,%xmm3,%xmm1
+       pxor    %xmm12,%xmm8
+
+       pxor    %xmm10,%xmm2
+       pxor    %xmm14,%xmm6
+       pxor    %xmm13,%xmm5
+       movdqa  %xmm7,%xmm3
+       pxor    %xmm9,%xmm1
+       movdqa  %xmm8,%xmm4
+       movdqa  48(%r11),%xmm7
+       jnz     .Lenc_loop
+       movdqa  64(%r11),%xmm7
+       jmp     .Lenc_loop
+.align 16
+.Lenc_done:
+       movdqa  0(%r11),%xmm7
+       movdqa  16(%r11),%xmm8
+       movdqa  %xmm1,%xmm9
+       psrlq   $1,%xmm1
+       movdqa  %xmm2,%xmm10
+       psrlq   $1,%xmm2
+       pxor    %xmm4,%xmm1
+       pxor    %xmm6,%xmm2
+       pand    %xmm7,%xmm1
+       pand    %xmm7,%xmm2
+       pxor    %xmm1,%xmm4
+       psllq   $1,%xmm1
+       pxor    %xmm2,%xmm6
+       psllq   $1,%xmm2
+       pxor    %xmm9,%xmm1
+       pxor    %xmm10,%xmm2
+       movdqa  %xmm3,%xmm9
+       psrlq   $1,%xmm3
+       movdqa  %xmm15,%xmm10
+       psrlq   $1,%xmm15
+       pxor    %xmm5,%xmm3
+       pxor    %xmm0,%xmm15
+       pand    %xmm7,%xmm3
+       pand    %xmm7,%xmm15
+       pxor    %xmm3,%xmm5
+       psllq   $1,%xmm3
+       pxor    %xmm15,%xmm0
+       psllq   $1,%xmm15
+       pxor    %xmm9,%xmm3
+       pxor    %xmm10,%xmm15
+       movdqa  32(%r11),%xmm7
+       movdqa  %xmm6,%xmm9
+       psrlq   $2,%xmm6
+       movdqa  %xmm2,%xmm10
+       psrlq   $2,%xmm2
+       pxor    %xmm4,%xmm6
+       pxor    %xmm1,%xmm2
+       pand    %xmm8,%xmm6
+       pand    %xmm8,%xmm2
+       pxor    %xmm6,%xmm4
+       psllq   $2,%xmm6
+       pxor    %xmm2,%xmm1
+       psllq   $2,%xmm2
+       pxor    %xmm9,%xmm6
+       pxor    %xmm10,%xmm2
+       movdqa  %xmm0,%xmm9
+       psrlq   $2,%xmm0
+       movdqa  %xmm15,%xmm10
+       psrlq   $2,%xmm15
+       pxor    %xmm5,%xmm0
+       pxor    %xmm3,%xmm15
+       pand    %xmm8,%xmm0
+       pand    %xmm8,%xmm15
+       pxor    %xmm0,%xmm5
+       psllq   $2,%xmm0
+       pxor    %xmm15,%xmm3
+       psllq   $2,%xmm15
+       pxor    %xmm9,%xmm0
+       pxor    %xmm10,%xmm15
+       movdqa  %xmm5,%xmm9
+       psrlq   $4,%xmm5
+       movdqa  %xmm3,%xmm10
+       psrlq   $4,%xmm3
+       pxor    %xmm4,%xmm5
+       pxor    %xmm1,%xmm3
+       pand    %xmm7,%xmm5
+       pand    %xmm7,%xmm3
+       pxor    %xmm5,%xmm4
+       psllq   $4,%xmm5
+       pxor    %xmm3,%xmm1
+       psllq   $4,%xmm3
+       pxor    %xmm9,%xmm5
+       pxor    %xmm10,%xmm3
+       movdqa  %xmm0,%xmm9
+       psrlq   $4,%xmm0
+       movdqa  %xmm15,%xmm10
+       psrlq   $4,%xmm15
+       pxor    %xmm6,%xmm0
+       pxor    %xmm2,%xmm15
+       pand    %xmm7,%xmm0
+       pand    %xmm7,%xmm15
+       pxor    %xmm0,%xmm6
+       psllq   $4,%xmm0
+       pxor    %xmm15,%xmm2
+       psllq   $4,%xmm15
+       pxor    %xmm9,%xmm0
+       pxor    %xmm10,%xmm15
+       movdqa  (%rax),%xmm7
+       pxor    %xmm7,%xmm3
+       pxor    %xmm7,%xmm5
+       pxor    %xmm7,%xmm2
+       pxor    %xmm7,%xmm6
+       pxor    %xmm7,%xmm1
+       pxor    %xmm7,%xmm4
+       pxor    %xmm7,%xmm15
+       pxor    %xmm7,%xmm0
+       .byte   0xf3,0xc3
+.size  _bsaes_encrypt8,.-_bsaes_encrypt8
+
+.type  _bsaes_decrypt8,@function
+.align 64
+_bsaes_decrypt8:
+       leaq    .LBS0(%rip),%r11
+
+       movdqa  (%rax),%xmm8
+       leaq    16(%rax),%rax
+       movdqa  -48(%r11),%xmm7
+       pxor    %xmm8,%xmm15
+       pxor    %xmm8,%xmm0
+.byte  102,68,15,56,0,255
+       pxor    %xmm8,%xmm1
+.byte  102,15,56,0,199
+       pxor    %xmm8,%xmm2
+.byte  102,15,56,0,207
+       pxor    %xmm8,%xmm3
+.byte  102,15,56,0,215
+       pxor    %xmm8,%xmm4
+.byte  102,15,56,0,223
+       pxor    %xmm8,%xmm5
+.byte  102,15,56,0,231
+       pxor    %xmm8,%xmm6
+.byte  102,15,56,0,239
+.byte  102,15,56,0,247
+       movdqa  0(%r11),%xmm7
+       movdqa  16(%r11),%xmm8
+       movdqa  %xmm5,%xmm9
+       psrlq   $1,%xmm5
+       movdqa  %xmm3,%xmm10
+       psrlq   $1,%xmm3
+       pxor    %xmm6,%xmm5
+       pxor    %xmm4,%xmm3
+       pand    %xmm7,%xmm5
+       pand    %xmm7,%xmm3
+       pxor    %xmm5,%xmm6
+       psllq   $1,%xmm5
+       pxor    %xmm3,%xmm4
+       psllq   $1,%xmm3
+       pxor    %xmm9,%xmm5
+       pxor    %xmm10,%xmm3
+       movdqa  %xmm1,%xmm9
+       psrlq   $1,%xmm1
+       movdqa  %xmm15,%xmm10
+       psrlq   $1,%xmm15
+       pxor    %xmm2,%xmm1
+       pxor    %xmm0,%xmm15
+       pand    %xmm7,%xmm1
+       pand    %xmm7,%xmm15
+       pxor    %xmm1,%xmm2
+       psllq   $1,%xmm1
+       pxor    %xmm15,%xmm0
+       psllq   $1,%xmm15
+       pxor    %xmm9,%xmm1
+       pxor    %xmm10,%xmm15
+       movdqa  32(%r11),%xmm7
+       movdqa  %xmm4,%xmm9
+       psrlq   $2,%xmm4
+       movdqa  %xmm3,%xmm10
+       psrlq   $2,%xmm3
+       pxor    %xmm6,%xmm4
+       pxor    %xmm5,%xmm3
+       pand    %xmm8,%xmm4
+       pand    %xmm8,%xmm3
+       pxor    %xmm4,%xmm6
+       psllq   $2,%xmm4
+       pxor    %xmm3,%xmm5
+       psllq   $2,%xmm3
+       pxor    %xmm9,%xmm4
+       pxor    %xmm10,%xmm3
+       movdqa  %xmm0,%xmm9
+       psrlq   $2,%xmm0
+       movdqa  %xmm15,%xmm10
+       psrlq   $2,%xmm15
+       pxor    %xmm2,%xmm0
+       pxor    %xmm1,%xmm15
+       pand    %xmm8,%xmm0
+       pand    %xmm8,%xmm15
+       pxor    %xmm0,%xmm2
+       psllq   $2,%xmm0
+       pxor    %xmm15,%xmm1
+       psllq   $2,%xmm15
+       pxor    %xmm9,%xmm0
+       pxor    %xmm10,%xmm15
+       movdqa  %xmm2,%xmm9
+       psrlq   $4,%xmm2
+       movdqa  %xmm1,%xmm10
+       psrlq   $4,%xmm1
+       pxor    %xmm6,%xmm2
+       pxor    %xmm5,%xmm1
+       pand    %xmm7,%xmm2
+       pand    %xmm7,%xmm1
+       pxor    %xmm2,%xmm6
+       psllq   $4,%xmm2
+       pxor    %xmm1,%xmm5
+       psllq   $4,%xmm1
+       pxor    %xmm9,%xmm2
+       pxor    %xmm10,%xmm1
+       movdqa  %xmm0,%xmm9
+       psrlq   $4,%xmm0
+       movdqa  %xmm15,%xmm10
+       psrlq   $4,%xmm15
+       pxor    %xmm4,%xmm0
+       pxor    %xmm3,%xmm15
+       pand    %xmm7,%xmm0
+       pand    %xmm7,%xmm15
+       pxor    %xmm0,%xmm4
+       psllq   $4,%xmm0
+       pxor    %xmm15,%xmm3
+       psllq   $4,%xmm15
+       pxor    %xmm9,%xmm0
+       pxor    %xmm10,%xmm15
+       decl    %r10d
+       jmp     .Ldec_sbox
+.align 16
+.Ldec_loop:
+       pxor    0(%rax),%xmm15
+       pxor    16(%rax),%xmm0
+.byte  102,68,15,56,0,255
+       pxor    32(%rax),%xmm1
+.byte  102,15,56,0,199
+       pxor    48(%rax),%xmm2
+.byte  102,15,56,0,207
+       pxor    64(%rax),%xmm3
+.byte  102,15,56,0,215
+       pxor    80(%rax),%xmm4
+.byte  102,15,56,0,223
+       pxor    96(%rax),%xmm5
+.byte  102,15,56,0,231
+       pxor    112(%rax),%xmm6
+.byte  102,15,56,0,239
+       leaq    128(%rax),%rax
+.byte  102,15,56,0,247
+.Ldec_sbox:
+       pxor    %xmm3,%xmm2
+
+       pxor    %xmm6,%xmm3
+       pxor    %xmm6,%xmm1
+       pxor    %xmm3,%xmm5
+       pxor    %xmm5,%xmm6
+       pxor    %xmm6,%xmm0
+
+       pxor    %xmm0,%xmm15
+       pxor    %xmm4,%xmm1
+       pxor    %xmm15,%xmm2
+       pxor    %xmm15,%xmm4
+       pxor    %xmm2,%xmm0
+       movdqa  %xmm2,%xmm10
+       movdqa  %xmm6,%xmm9
+       movdqa  %xmm0,%xmm8
+       movdqa  %xmm3,%xmm12
+       movdqa  %xmm4,%xmm11
+
+       pxor    %xmm15,%xmm10
+       pxor    %xmm3,%xmm9
+       pxor    %xmm5,%xmm8
+       movdqa  %xmm10,%xmm13
+       pxor    %xmm15,%xmm12
+       movdqa  %xmm9,%xmm7
+       pxor    %xmm1,%xmm11
+       movdqa  %xmm10,%xmm14
+
+       por     %xmm8,%xmm9
+       por     %xmm11,%xmm10
+       pxor    %xmm7,%xmm14
+       pand    %xmm11,%xmm13
+       pxor    %xmm8,%xmm11
+       pand    %xmm8,%xmm7
+       pand    %xmm11,%xmm14
+       movdqa  %xmm5,%xmm11
+       pxor    %xmm1,%xmm11
+       pand    %xmm11,%xmm12
+       pxor    %xmm12,%xmm10
+       pxor    %xmm12,%xmm9
+       movdqa  %xmm2,%xmm12
+       movdqa  %xmm0,%xmm11
+       pxor    %xmm6,%xmm12
+       pxor    %xmm4,%xmm11
+       movdqa  %xmm12,%xmm8
+       pand    %xmm11,%xmm12
+       por     %xmm11,%xmm8
+       pxor    %xmm12,%xmm7
+       pxor    %xmm14,%xmm10
+       pxor    %xmm13,%xmm9
+       pxor    %xmm14,%xmm8
+       movdqa  %xmm3,%xmm11
+       pxor    %xmm13,%xmm7
+       movdqa  %xmm15,%xmm12
+       pxor    %xmm13,%xmm8
+       movdqa  %xmm6,%xmm13
+       pand    %xmm5,%xmm11
+       movdqa  %xmm2,%xmm14
+       pand    %xmm1,%xmm12
+       pand    %xmm0,%xmm13
+       por     %xmm4,%xmm14
+       pxor    %xmm11,%xmm10
+       pxor    %xmm12,%xmm9
+       pxor    %xmm13,%xmm8
+       pxor    %xmm14,%xmm7
+
+
+
+
+
+       movdqa  %xmm10,%xmm11
+       pand    %xmm8,%xmm10
+       pxor    %xmm9,%xmm11
+
+       movdqa  %xmm7,%xmm13
+       movdqa  %xmm11,%xmm14
+       pxor    %xmm10,%xmm13
+       pand    %xmm13,%xmm14
+
+       movdqa  %xmm8,%xmm12
+       pxor    %xmm9,%xmm14
+       pxor    %xmm7,%xmm12
+
+       pxor    %xmm9,%xmm10
+
+       pand    %xmm10,%xmm12
+
+       movdqa  %xmm13,%xmm9
+       pxor    %xmm7,%xmm12
+
+       pxor    %xmm12,%xmm9
+       pxor    %xmm12,%xmm8
+
+       pand    %xmm7,%xmm9
+
+       pxor    %xmm9,%xmm13
+       pxor    %xmm9,%xmm8
+
+       pand    %xmm14,%xmm13
+
+       pxor    %xmm11,%xmm13
+       movdqa  %xmm4,%xmm11
+       movdqa  %xmm0,%xmm7
+       movdqa  %xmm14,%xmm9
+       pxor    %xmm13,%xmm9
+       pand    %xmm4,%xmm9
+       pxor    %xmm0,%xmm4
+       pand    %xmm14,%xmm0
+       pand    %xmm13,%xmm4
+       pxor    %xmm0,%xmm4
+       pxor    %xmm9,%xmm0
+       pxor    %xmm1,%xmm11
+       pxor    %xmm5,%xmm7
+       pxor    %xmm12,%xmm14
+       pxor    %xmm8,%xmm13
+       movdqa  %xmm14,%xmm10
+       movdqa  %xmm12,%xmm9
+       pxor    %xmm13,%xmm10
+       pxor    %xmm8,%xmm9
+       pand    %xmm11,%xmm10
+       pand    %xmm1,%xmm9
+       pxor    %xmm7,%xmm11
+       pxor    %xmm5,%xmm1
+       pand    %xmm14,%xmm7
+       pand    %xmm12,%xmm5
+       pand    %xmm13,%xmm11
+       pand    %xmm8,%xmm1
+       pxor    %xmm11,%xmm7
+       pxor    %xmm5,%xmm1
+       pxor    %xmm10,%xmm11
+       pxor    %xmm9,%xmm5
+       pxor    %xmm11,%xmm4
+       pxor    %xmm11,%xmm1
+       pxor    %xmm7,%xmm0
+       pxor    %xmm7,%xmm5
+
+       movdqa  %xmm2,%xmm11
+       movdqa  %xmm6,%xmm7
+       pxor    %xmm15,%xmm11
+       pxor    %xmm3,%xmm7
+       movdqa  %xmm14,%xmm10
+       movdqa  %xmm12,%xmm9
+       pxor    %xmm13,%xmm10
+       pxor    %xmm8,%xmm9
+       pand    %xmm11,%xmm10
+       pand    %xmm15,%xmm9
+       pxor    %xmm7,%xmm11
+       pxor    %xmm3,%xmm15
+       pand    %xmm14,%xmm7
+       pand    %xmm12,%xmm3
+       pand    %xmm13,%xmm11
+       pand    %xmm8,%xmm15
+       pxor    %xmm11,%xmm7
+       pxor    %xmm3,%xmm15
+       pxor    %xmm10,%xmm11
+       pxor    %xmm9,%xmm3
+       pxor    %xmm12,%xmm14
+       pxor    %xmm8,%xmm13
+       movdqa  %xmm14,%xmm10
+       pxor    %xmm13,%xmm10
+       pand    %xmm2,%xmm10
+       pxor    %xmm6,%xmm2
+       pand    %xmm14,%xmm6
+       pand    %xmm13,%xmm2
+       pxor    %xmm6,%xmm2
+       pxor    %xmm10,%xmm6
+       pxor    %xmm11,%xmm2
+       pxor    %xmm11,%xmm15
+       pxor    %xmm7,%xmm6
+       pxor    %xmm7,%xmm3
+       pxor    %xmm6,%xmm0
+       pxor    %xmm4,%xmm5
+
+       pxor    %xmm0,%xmm3
+       pxor    %xmm6,%xmm1
+       pxor    %xmm6,%xmm4
+       pxor    %xmm1,%xmm3
+       pxor    %xmm15,%xmm6
+       pxor    %xmm4,%xmm3
+       pxor    %xmm5,%xmm2
+       pxor    %xmm0,%xmm5
+       pxor    %xmm3,%xmm2
+
+       pxor    %xmm15,%xmm3
+       pxor    %xmm2,%xmm6
+       decl    %r10d
+       jl      .Ldec_done
+
+       pshufd  $147,%xmm4,%xmm14
+       movdqa  %xmm5,%xmm9
+       pxor    %xmm6,%xmm4
+       pxor    %xmm6,%xmm5
+       pshufd  $147,%xmm15,%xmm7
+       movdqa  %xmm6,%xmm12
+       pxor    %xmm15,%xmm6
+       pxor    %xmm0,%xmm15
+       pshufd  $147,%xmm0,%xmm8
+       pxor    %xmm5,%xmm0
+       pxor    %xmm2,%xmm15
+       pxor    %xmm3,%xmm0
+       pshufd  $147,%xmm3,%xmm10
+       pxor    %xmm15,%xmm5
+       pxor    %xmm4,%xmm3
+       pxor    %xmm2,%xmm4
+       pshufd  $147,%xmm2,%xmm13
+       movdqa  %xmm1,%xmm11
+       pxor    %xmm1,%xmm2
+       pxor    %xmm3,%xmm1
+       pxor    %xmm4,%xmm3
+       pxor    %xmm12,%xmm2
+       pxor    %xmm9,%xmm3
+       pxor    %xmm11,%xmm3
+       pshufd  $147,%xmm12,%xmm12
+
+       pxor    %xmm4,%xmm6
+       pxor    %xmm7,%xmm4
+       pxor    %xmm8,%xmm6
+       pshufd  $147,%xmm9,%xmm9
+       pxor    %xmm12,%xmm4
+       pxor    %xmm13,%xmm6
+       pxor    %xmm14,%xmm4
+       pshufd  $147,%xmm11,%xmm11
+       pxor    %xmm13,%xmm14
+       pxor    %xmm4,%xmm6
+
+       pxor    %xmm7,%xmm5
+       pshufd  $147,%xmm7,%xmm7
+       pxor    %xmm8,%xmm15
+       pxor    %xmm8,%xmm0
+       pxor    %xmm9,%xmm15
+       pshufd  $147,%xmm8,%xmm8
+       pxor    %xmm9,%xmm5
+       pxor    %xmm9,%xmm3
+       pxor    %xmm14,%xmm15
+       pshufd  $147,%xmm9,%xmm9
+       pxor    %xmm10,%xmm5
+       pxor    %xmm10,%xmm1
+       pxor    %xmm10,%xmm0
+       pshufd  $147,%xmm10,%xmm10
+       pxor    %xmm11,%xmm2
+       pxor    %xmm11,%xmm3
+       pxor    %xmm14,%xmm2
+       pxor    %xmm12,%xmm5
+       pxor    %xmm11,%xmm0
+       pxor    %xmm12,%xmm14
+
+       pxor    %xmm14,%xmm3
+       pshufd  $147,%xmm11,%xmm11
+       pxor    %xmm14,%xmm1
+       pxor    %xmm14,%xmm0
+
+       pxor    %xmm12,%xmm14
+       pshufd  $147,%xmm12,%xmm12
+       pxor    %xmm13,%xmm14
+
+
+       pxor    %xmm2,%xmm0
+       pxor    %xmm11,%xmm2
+       pshufd  $147,%xmm13,%xmm13
+       pxor    %xmm7,%xmm15
+       pxor    %xmm12,%xmm2
+       pxor    %xmm9,%xmm15
+       pshufd  $147,%xmm14,%xmm14
+
+       pxor    %xmm6,%xmm5
+       pxor    %xmm8,%xmm6
+       pxor    %xmm7,%xmm4
+       pxor    %xmm7,%xmm5
+       pxor    %xmm12,%xmm6
+       pxor    %xmm12,%xmm4
+       pxor    %xmm14,%xmm6
+       pshufd  $147,%xmm7,%xmm7
+       pxor    %xmm13,%xmm4
+       pxor    %xmm6,%xmm5
+       pxor    %xmm8,%xmm0
+       pshufd  $147,%xmm8,%xmm8
+
+       pxor    %xmm14,%xmm2
+       pxor    %xmm9,%xmm0
+       pxor    %xmm9,%xmm3
+       pshufd  $147,%xmm9,%xmm9
+       pxor    %xmm13,%xmm15
+       pxor    %xmm10,%xmm13
+       pxor    %xmm2,%xmm0
+       pxor    %xmm13,%xmm5
+
+       pxor    %xmm13,%xmm1
+       pxor    %xmm12,%xmm3
+       pxor    %xmm11,%xmm1
+       pshufd  $147,%xmm11,%xmm11
+       pxor    %xmm13,%xmm3
+       pxor    %xmm14,%xmm1
+       pxor    %xmm10,%xmm13
+
+       pshufd  $147,%xmm12,%xmm12
+       pshufd  $147,%xmm13,%xmm13
+       pshufd  $147,%xmm14,%xmm14
+       pshufd  $147,%xmm10,%xmm10
+
+
+       pxor    %xmm6,%xmm0
+       pxor    %xmm6,%xmm8
+       pxor    %xmm12,%xmm7
+       pxor    %xmm12,%xmm8
+       pxor    %xmm7,%xmm5
+       pxor    %xmm4,%xmm7
+       pxor    %xmm13,%xmm8
+       pxor    %xmm14,%xmm13
+       pxor    %xmm8,%xmm0
+       pxor    %xmm11,%xmm2
+       pxor    %xmm0,%xmm11
+       pxor    %xmm10,%xmm1
+       pxor    %xmm5,%xmm10
+       pxor    %xmm9,%xmm3
+       pxor    %xmm15,%xmm9
+       pxor    %xmm14,%xmm10
+       pxor    %xmm3,%xmm12
+       pxor    %xmm13,%xmm9
+       pxor    %xmm13,%xmm12
+       pxor    %xmm1,%xmm13
+       pxor    %xmm2,%xmm14
+
+       movdqa  %xmm7,%xmm15
+       movdqa  %xmm8,%xmm0
+       movdqa  %xmm9,%xmm1
+       movdqa  %xmm10,%xmm2
+       movdqa  %xmm11,%xmm3
+       movdqa  %xmm12,%xmm4
+       movdqa  %xmm13,%xmm5
+       movdqa  %xmm14,%xmm6
+       movdqa  -16(%r11),%xmm7
+       jnz     .Ldec_loop
+       movdqa  -32(%r11),%xmm7
+       jmp     .Ldec_loop
+.align 16
+.Ldec_done:
+       movdqa  0(%r11),%xmm7
+       movdqa  16(%r11),%xmm8
+       movdqa  %xmm2,%xmm9
+       psrlq   $1,%xmm2
+       movdqa  %xmm1,%xmm10
+       psrlq   $1,%xmm1
+       pxor    %xmm4,%xmm2
+       pxor    %xmm6,%xmm1
+       pand    %xmm7,%xmm2
+       pand    %xmm7,%xmm1
+       pxor    %xmm2,%xmm4
+       psllq   $1,%xmm2
+       pxor    %xmm1,%xmm6
+       psllq   $1,%xmm1
+       pxor    %xmm9,%xmm2
+       pxor    %xmm10,%xmm1
+       movdqa  %xmm5,%xmm9
+       psrlq   $1,%xmm5
+       movdqa  %xmm15,%xmm10
+       psrlq   $1,%xmm15
+       pxor    %xmm3,%xmm5
+       pxor    %xmm0,%xmm15
+       pand    %xmm7,%xmm5
+       pand    %xmm7,%xmm15
+       pxor    %xmm5,%xmm3
+       psllq   $1,%xmm5
+       pxor    %xmm15,%xmm0
+       psllq   $1,%xmm15
+       pxor    %xmm9,%xmm5
+       pxor    %xmm10,%xmm15
+       movdqa  32(%r11),%xmm7
+       movdqa  %xmm6,%xmm9
+       psrlq   $2,%xmm6
+       movdqa  %xmm1,%xmm10
+       psrlq   $2,%xmm1
+       pxor    %xmm4,%xmm6
+       pxor    %xmm2,%xmm1
+       pand    %xmm8,%xmm6
+       pand    %xmm8,%xmm1
+       pxor    %xmm6,%xmm4
+       psllq   $2,%xmm6
+       pxor    %xmm1,%xmm2
+       psllq   $2,%xmm1
+       pxor    %xmm9,%xmm6
+       pxor    %xmm10,%xmm1
+       movdqa  %xmm0,%xmm9
+       psrlq   $2,%xmm0
+       movdqa  %xmm15,%xmm10
+       psrlq   $2,%xmm15
+       pxor    %xmm3,%xmm0
+       pxor    %xmm5,%xmm15
+       pand    %xmm8,%xmm0
+       pand    %xmm8,%xmm15
+       pxor    %xmm0,%xmm3
+       psllq   $2,%xmm0