TO_REMOVE+=/usr/bin/kzip
TO_REMOVE+=/usr/share/man/cat8/kzip.8.gz
TO_REMOVE+=/usr/share/man/man8/kzip.8.gz
+TO_REMOVE+=/usr/include/openssl/e_os.h
+TO_REMOVE+=/usr/include/openssl/eng_int.h
+TO_REMOVE+=/usr/include/openssl/ui_locl.h
.if ${MACHINE_ARCH} == "x86_64"
TO_REMOVE+=/usr/libdata/stallion/2681.sys
int SSL_CTX_set_tlsext_use_srtp(SSL_CTX *ctx, const char *profiles);
int SSL_set_tlsext_use_srtp(SSL *ctx, const char *profiles);
-SRTP_PROTECTION_PROFILE *SSL_get_selected_srtp_profile(SSL *s);
STACK_OF(SRTP_PROTECTION_PROFILE) *SSL_get_srtp_profiles(SSL *ssl);
SRTP_PROTECTION_PROFILE *SSL_get_selected_srtp_profile(SSL *s);
.include "Makefile.inc"
# base sources
-SRCS= cpt_err.c cryptlib.c cversion.c ebcdic.c ex_data.c \
- mem.c mem_dbg.c o_dir.c o_time.c uid.c
+SRCS= cryptlib.c mem.c mem_dbg.c cversion.c ex_data.c cpt_err.c ebcdic.c \
+ uid.c o_time.c o_dir.c o_fips.c o_init.c fips_ers.c
.if ${MACHINE_ARCH} == "i386"
SRCS+= x86cpuid.s
.elif ${MACHINE_ARCH} == "x86_64"
SRCS+= x86_64cpuid.s
.endif
-INCS= ../e_os.h ../e_os2.h crypto.h ebcdic.h opensslv.h ossl_typ.h symhacks.h
+INCS= ../e_os2.h
+INCS+= crypto.h opensslv.h opensslconf.h ebcdic.h symhacks.h ossl_typ.h
+SRCS+= buildinf.h
+INCSDIR= ${INCLUDEDIR}/openssl
+
+CLEANFILES+= buildinf.h opensslconf.h
+
+buildinf.h:
+ ( echo "#ifndef MK1MF_BUILD"; \
+ echo " /* auto-generated by crypto/Makefile.ssl for crypto/cversion.c */"; \
+ echo " #define CFLAGS \"$(CC)\""; \
+ echo " #define PLATFORM \"`uname -s`-`uname -m`\""; \
+ echo " #define DATE \"`LC_ALL=C date`\""; \
+ echo "#endif" ) > ${.TARGET}
+
+opensslconf.h: opensslconf-${MACHINE_ARCH}.h
+.if defined(WANT_IDEA)
+ sed '/^# define OPENSSL_NO_IDEA$$/d;/^# define NO_IDEA$$/d' ${.ALLSRC} > ${.TARGET}
+.else
+ cp ${.ALLSRC} ${.TARGET}
+.endif
+
# aes
-SRCS+= aes_cfb.c aes_ctr.c aes_ecb.c aes_ige.c \
- aes_misc.c aes_ofb.c aes_wrap.c
+SRCS+= aes_misc.c aes_ecb.c aes_cfb.c aes_ofb.c aes_ctr.c aes_ige.c aes_wrap.c
.if ${MACHINE_ARCH} == "i386"
-SRCS+= aes-586.s
+SRCS+= aes-586.s vpaes-x86.s aesni-x86.s
.elif ${MACHINE_ARCH} == "x86_64"
-SRCS+= aes-x86_64.s
+SRCS+= aes-x86_64.s vpaes-x86_64.s bsaes-x86_64.s aesni-x86_64.s aesni-sha1-x86_64.s
.endif
-INCS+= aes.h aes_locl.h
+INCS+= aes.h
# asn1
-SRCS+= a_bitstr.c a_bool.c a_bytes.c a_d2i_fp.c a_digest.c \
- a_dup.c a_enum.c a_gentm.c a_i2d_fp.c a_int.c \
- a_mbstr.c a_object.c a_octet.c a_print.c \
- a_set.c a_sign.c a_strex.c a_strnid.c a_time.c a_type.c \
- a_utctm.c a_utf8.c a_verify.c ameth_lib.c asn1_err.c asn1_lib.c \
- asn1_gen.c asn1_par.c asn_mime.c \
- asn_moid.c asn_pack.c bio_asn1.c bio_ndef.c d2i_pr.c d2i_pu.c \
- evp_asn1.c f_enum.c f_int.c f_string.c i2d_pr.c i2d_pu.c \
- n_pkey.c nsseq.c p5_pbe.c p5_pbev2.c p8_pkey.c t_bitst.c \
- t_crl.c t_pkey.c t_req.c t_spki.c t_x509.c t_x509a.c \
- tasn_dec.c tasn_enc.c tasn_fre.c tasn_new.c tasn_prn.c tasn_typ.c \
- tasn_utl.c x_algor.c x_attrib.c x_bignum.c x_crl.c \
- x_exten.c x_info.c x_long.c x_name.c x_nx509.c x_pkey.c x_pubkey.c \
- x_req.c x_sig.c x_spki.c x_val.c x_x509.c x_x509a.c
+SRCS+= a_object.c a_bitstr.c a_utctm.c a_gentm.c a_time.c a_int.c a_octet.c \
+ a_print.c a_type.c a_set.c a_dup.c a_d2i_fp.c a_i2d_fp.c \
+ a_enum.c a_utf8.c a_sign.c a_digest.c a_verify.c a_mbstr.c a_strex.c \
+ x_algor.c x_val.c x_pubkey.c x_sig.c x_req.c x_attrib.c x_bignum.c \
+ x_long.c x_name.c x_x509.c x_x509a.c x_crl.c x_info.c x_spki.c nsseq.c \
+ x_nx509.c d2i_pu.c d2i_pr.c i2d_pu.c i2d_pr.c \
+ t_req.c t_x509.c t_x509a.c t_crl.c t_pkey.c t_spki.c t_bitst.c \
+ tasn_new.c tasn_fre.c tasn_enc.c tasn_dec.c tasn_utl.c tasn_typ.c \
+ tasn_prn.c ameth_lib.c \
+ f_int.c f_string.c n_pkey.c \
+ f_enum.c x_pkey.c a_bool.c x_exten.c bio_asn1.c bio_ndef.c asn_mime.c \
+ asn1_gen.c asn1_par.c asn1_lib.c asn1_err.c a_bytes.c a_strnid.c \
+ evp_asn1.c asn_pack.c p5_pbe.c p5_pbev2.c p8_pkey.c asn_moid.c
INCS+= asn1.h asn1_mac.h asn1t.h
# bf
-SRCS+= bf_cfb64.c bf_ecb.c bf_ofb64.c bf_skey.c
+SRCS+= bf_skey.c bf_ecb.c bf_cfb64.c bf_ofb64.c
.if ${MACHINE_ARCH} == "i386"
SRCS+= bf-586.s
.elif ${MACHINE_ARCH} == "x86_64"
INCS+= blowfish.h
# bio
-SRCS+= b_dump.c b_print.c b_sock.c bf_buff.c bf_lbuf.c bf_nbio.c \
- bf_null.c bio_cb.c bio_err.c bio_lib.c bss_acpt.c bss_bio.c \
- bss_conn.c bss_dgram.c bss_fd.c bss_file.c bss_log.c bss_mem.c \
- bss_null.c bss_sock.c
+SRCS+= bio_lib.c bio_cb.c bio_err.c \
+ bss_mem.c bss_null.c bss_fd.c \
+ bss_file.c bss_sock.c bss_conn.c \
+ bf_null.c bf_buff.c b_print.c b_dump.c \
+ b_sock.c bss_acpt.c bf_nbio.c bss_log.c bss_bio.c \
+ bss_dgram.c
INCS+= bio.h
# bn
-SRCS+= bn_add.c bn_blind.c bn_const.c bn_ctx.c bn_depr.c \
- bn_div.c bn_err.c bn_exp.c \
- bn_exp2.c bn_gcd.c bn_gf2m.c bn_kron.c bn_lib.c bn_mod.c bn_mont.c \
- bn_mpi.c bn_mul.c bn_nist.c \
- bn_prime.c bn_print.c bn_rand.c bn_recp.c \
- bn_shift.c bn_sqr.c bn_sqrt.c bn_word.c
+SRCS+= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c bn_mod.c \
+ bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \
+ bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c \
+ bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \
+ bn_depr.c bn_const.c bn_x931p.c
.if ${MACHINE_ARCH} == "i386"
-SRCS+= bn-586.s co-586.s x86-mont.s
+SRCS+= bn-586.s co-586.s x86-mont.s x86-gf2m.s
.elif ${MACHINE_ARCH} == "x86_64"
-SRCS+= x86_64-gcc.c x86_64-mont.s
+SRCS+= x86_64-gcc.c x86_64-mont.s x86_64-mont5.s x86_64-gf2m.s modexp512-x86_64.s
.endif
INCS+= bn.h
# buffer
-SRCS+= buf_err.c buffer.c
+SRCS+= buffer.c buf_str.c buf_err.c
INCS+= buffer.h
# camellia
-SRCS+= cmll_cfb.c \
- cmll_ctr.c cmll_ecb.c cmll_ofb.c
+SRCS+= cmll_ecb.c cmll_ofb.c cmll_cfb.c cmll_ctr.c cmll_utl.c
.if ${MACHINE_ARCH} == "i386"
SRCS+= cmll-x86.s
.elif ${MACHINE_ARCH} == "x86_64"
INCS+= camellia.h
# cast
-SRCS+= c_cfb64.c c_ecb.c c_enc.c c_ofb64.c c_skey.c
+SRCS+= c_skey.c c_ecb.c c_enc.c c_cfb64.c c_ofb64.c
INCS+= cast.h
+# cmac
+SRCS+= cmac.c cm_ameth.c cm_pmeth.c
+INCS+= cmac.h
+
# cms
-SRCS+= cms_asn1.c cms_att.c cms_cd.c cms_dd.c cms_enc.c cms_env.c cms_err.c \
- cms_ess.c cms_io.c cms_lib.c cms_sd.c cms_smime.c
+SRCS+= cms_lib.c cms_asn1.c cms_att.c cms_io.c cms_smime.c cms_err.c \
+ cms_sd.c cms_dd.c cms_cd.c cms_env.c cms_enc.c cms_ess.c \
+ cms_pwri.c
INCS+= cms.h
# comp
-SRCS+= c_rle.c c_zlib.c comp_err.c comp_lib.c
+SRCS+= comp_lib.c comp_err.c \
+ c_rle.c c_zlib.c
INCS+= comp.h
# conf
-SRCS+= conf_api.c conf_def.c conf_err.c conf_lib.c conf_mall.c conf_mod.c conf_sap.c
+SRCS+= conf_err.c conf_lib.c conf_api.c conf_def.c conf_mod.c \
+ conf_mall.c conf_sap.c
INCS+= conf.h conf_api.h
# des
-SRCS+= cbc3_enc.c cbc_cksm.c cbc_enc.c cfb64ede.c cfb64enc.c cfb_enc.c \
- des_old.c des_old2.c \
- ecb3_enc.c ecb_enc.c ede_cbcm_enc.c \
- enc_read.c enc_writ.c fcrypt.c ofb64ede.c ofb64enc.c \
- ofb_enc.c pcbc_enc.c qud_cksm.c rand_key.c read2pwd.c \
- rpc_enc.c set_key.c str2key.c xcbc_enc.c
+SRCS+= set_key.c ecb_enc.c cbc_enc.c \
+ ecb3_enc.c cfb64enc.c cfb64ede.c cfb_enc.c ofb64ede.c \
+ enc_read.c enc_writ.c ofb64enc.c \
+ ofb_enc.c str2key.c pcbc_enc.c qud_cksm.c rand_key.c \
+ fcrypt.c xcbc_enc.c rpc_enc.c cbc_cksm.c \
+ ede_cbcm_enc.c des_old.c des_old2.c read2pwd.c
.if ${MACHINE_ARCH} == "i386"
SRCS+= des-586.s crypt586.s
.elif ${MACHINE_ARCH} == "x86_64"
INCS+= des.h des_old.h
# dh
-SRCS+= dh_ameth.c dh_asn1.c dh_check.c dh_depr.c \
- dh_err.c dh_gen.c dh_key.c dh_lib.c dh_pmeth.c dh_prn.c
+SRCS+= dh_asn1.c dh_gen.c dh_key.c dh_lib.c dh_check.c dh_err.c dh_depr.c \
+ dh_ameth.c dh_pmeth.c dh_prn.c
INCS+= dh.h
# dsa
-SRCS+= dsa_ameth.c dsa_asn1.c dsa_depr.c dsa_err.c \
- dsa_gen.c dsa_key.c dsa_lib.c \
- dsa_ossl.c dsa_pmeth.c dsa_prn.c dsa_sign.c dsa_vrf.c
+SRCS+= dsa_gen.c dsa_key.c dsa_lib.c dsa_asn1.c dsa_vrf.c dsa_sign.c \
+ dsa_err.c dsa_ossl.c dsa_depr.c dsa_ameth.c dsa_pmeth.c dsa_prn.c
INCS+= dsa.h
# dso
INCS+= dso.h
# ec
-SRCS+= ec_ameth.c ec_asn1.c ec_check.c ec_curve.c \
- ec_cvt.c ec_err.c ec_key.c ec_lib.c \
- ec_mult.c ec_pmeth.c ec_print.c ec2_smpl.c ec2_mult.c \
- eck_prn.c ecp_mont.c ecp_nist.c \
- ecp_smpl.c
+SRCS+= ec_lib.c ecp_smpl.c ecp_mont.c ecp_nist.c ec_cvt.c ec_mult.c \
+ ec_err.c ec_curve.c ec_check.c ec_print.c ec_asn1.c ec_key.c \
+ ec2_smpl.c ec2_mult.c ec_ameth.c ec_pmeth.c eck_prn.c \
+ ecp_nistp224.c ecp_nistp256.c ecp_nistp521.c ecp_nistputil.c \
+ ecp_oct.c ec2_oct.c ec_oct.c
INCS+= ec.h
# ecdh
-SRCS+= ech_err.c ech_key.c ech_lib.c ech_ossl.c
+SRCS+= ech_lib.c ech_ossl.c ech_key.c ech_err.c
INCS+= ecdh.h
# ecdsa
-SRCS+= ecs_asn1.c ecs_err.c ecs_lib.c ecs_ossl.c ecs_sign.c ecs_vrf.c
+SRCS+= ecs_lib.c ecs_asn1.c ecs_ossl.c ecs_sign.c ecs_vrf.c ecs_err.c
INCS+= ecdsa.h
# engine
CFLAGS+=-DHAVE_CRYPTODEV
-SRCS+= eng_all.c eng_cnf.c eng_cryptodev.c eng_ctrl.c eng_dyn.c eng_err.c \
- eng_fat.c eng_init.c eng_lib.c eng_list.c eng_openssl.c \
- eng_pkey.c eng_table.c tb_asnmth.c tb_cipher.c tb_dh.c \
- tb_digest.c tb_dsa.c tb_ecdh.c tb_ecdsa.c \
- tb_pkmeth.c tb_rand.c tb_rsa.c tb_store.c
-INCS+= eng_int.h engine.h
+SRCS+= eng_err.c eng_lib.c eng_list.c eng_init.c eng_ctrl.c \
+ eng_table.c eng_pkey.c eng_fat.c eng_all.c \
+ tb_rsa.c tb_dsa.c tb_ecdsa.c tb_dh.c tb_ecdh.c tb_rand.c tb_store.c \
+ tb_cipher.c tb_digest.c tb_pkmeth.c tb_asnmth.c \
+ eng_openssl.c eng_cnf.c eng_dyn.c eng_cryptodev.c \
+ eng_rsax.c eng_rdrand.c
+INCS+= engine.h
# err
SRCS+= err.c err_all.c err_prn.c
INCS+= err.h
# evp
-SRCS+= bio_b64.c bio_enc.c bio_md.c bio_ok.c c_all.c c_allc.c c_alld.c \
- digest.c e_aes.c e_bf.c e_camellia.c e_cast.c e_des.c e_des3.c \
- e_idea.c e_null.c e_rc2.c e_rc4.c e_rc5.c e_seed.c e_xcbc_d.c \
- encode.c \
- evp_acnf.c evp_enc.c evp_err.c evp_key.c evp_lib.c evp_pbe.c \
- evp_pkey.c m_dss.c m_dss1.c m_ecdsa.c m_md2.c m_md4.c m_md5.c \
- m_mdc2.c m_null.c m_ripemd.c m_sha.c m_sha1.c \
- m_sigver.c m_wp.c names.c openbsd_hw.c \
- p5_crpt.c p5_crpt2.c p_dec.c p_enc.c p_lib.c p_open.c p_seal.c \
- p_sign.c p_verify.c pmeth_fn.c pmeth_gn.c pmeth_lib.c
+SRCS+= encode.c digest.c evp_enc.c evp_key.c evp_acnf.c \
+ e_des.c e_bf.c e_idea.c e_des3.c e_camellia.c\
+ e_rc4.c e_aes.c names.c e_seed.c \
+ e_xcbc_d.c e_rc2.c e_cast.c e_rc5.c \
+ m_null.c m_md2.c m_md4.c m_md5.c m_sha.c m_sha1.c m_wp.c \
+ m_dss.c m_dss1.c m_mdc2.c m_ripemd.c m_ecdsa.c\
+ p_open.c p_seal.c p_sign.c p_verify.c p_lib.c p_enc.c p_dec.c \
+ bio_md.c bio_b64.c bio_enc.c evp_err.c e_null.c \
+ c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \
+ evp_pkey.c evp_pbe.c p5_crpt.c p5_crpt2.c \
+ e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c evp_fips.c \
+ e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c
INCS+= evp.h
# hmac
-SRCS+= hm_ameth.c hm_pmeth.c hmac.c
+SRCS+= hmac.c hm_ameth.c hm_pmeth.c
INCS+= hmac.h
# idea
.if defined(WANT_IDEA)
-SRCS+= i_cbc.c i_cfb64.c i_ecb.c i_ofb64.c i_skey.c
+SRCS+= i_cbc.c i_cfb64.c i_ofb64.c i_ecb.c i_skey.c
INCS+= idea.h
_ideapath= ${LCRYPTO_SRC}/crypto/idea
.endif
-# krb5
-#SRCS+= krb5_asn.c
-#INCS+= krb5_asn.h
-
# lhash
-SRCS+= lh_stats.c lhash.c
+SRCS+= lhash.c lh_stats.c
INCS+= lhash.h
# md2
INCS+= md5.h
# mdc2
-SRCS+= mdc2_one.c mdc2dgst.c
+SRCS+= mdc2dgst.c mdc2_one.c
INCS+= mdc2.h
# modes
-SRCS+= cbc128.c cfb128.c ctr128.c cts128.c ofb128.c
+SRCS+= cbc128.c ctr128.c cts128.c cfb128.c ofb128.c gcm128.c \
+ ccm128.c xts128.c
+.if ${MACHINE_ARCH} == "i386"
+SRCS+= ghash-x86.s
+.elif ${MACHINE_ARCH} == "x86_64"
+SRCS+= ghash-x86_64.s
+.endif
INCS+= modes.h
# objects
-SRCS+= o_names.c obj_dat.c obj_err.c obj_lib.c obj_xref.c
+SRCS+= o_names.c obj_dat.c obj_lib.c obj_err.c obj_xref.c
INCS+= objects.h obj_mac.h
# ocsp
-SRCS+= ocsp_asn.c ocsp_cl.c ocsp_err.c ocsp_ext.c ocsp_ht.c \
- ocsp_lib.c ocsp_prn.c ocsp_srv.c ocsp_vfy.c
+SRCS+= ocsp_asn.c ocsp_ext.c ocsp_ht.c ocsp_lib.c ocsp_cl.c \
+ ocsp_srv.c ocsp_prn.c ocsp_vfy.c ocsp_err.c
INCS+= ocsp.h
# pem
-SRCS+= pem_all.c pem_err.c pem_info.c pem_lib.c pem_oth.c pem_pk8.c \
- pem_pkey.c pem_seal.c pem_sign.c pem_x509.c pem_xaux.c pvkfmt.c
+SRCS+= pem_sign.c pem_seal.c pem_info.c pem_lib.c pem_all.c pem_err.c \
+ pem_x509.c pem_xaux.c pem_oth.c pem_pk8.c pem_pkey.c pvkfmt.c
INCS+= pem.h pem2.h
# pkcs12
-SRCS+= p12_add.c p12_asn.c p12_attr.c p12_crpt.c p12_crt.c \
- p12_decr.c p12_init.c p12_key.c p12_kiss.c p12_mutl.c \
- p12_npas.c p12_p8d.c p12_p8e.c p12_utl.c pk12err.c
+SRCS+= p12_add.c p12_asn.c p12_attr.c p12_crpt.c p12_crt.c p12_decr.c \
+ p12_init.c p12_key.c p12_kiss.c p12_mutl.c \
+ p12_utl.c p12_npas.c pk12err.c p12_p8d.c p12_p8e.c
INCS+= pkcs12.h
# pkcs7
-SRCS+= bio_pk7.c example.c pk7_asn1.c pk7_attr.c pk7_dgst.c pk7_doit.c \
- pk7_lib.c pk7_mime.c pk7_smime.c pkcs7err.c
+SRCS+= pk7_asn1.c pk7_lib.c pkcs7err.c pk7_doit.c pk7_smime.c pk7_attr.c \
+ pk7_mime.c bio_pk7.c
INCS+= pkcs7.h
# pqueue
INCS+= pqueue.h
# rand
-SRCS+= md_rand.c rand_egd.c \
- rand_err.c rand_lib.c rand_nw.c rand_unix.c \
- randfile.c
+SRCS+= md_rand.c randfile.c rand_lib.c rand_err.c rand_egd.c \
+ rand_unix.c
INCS+= rand.h
# rc2
-SRCS+= rc2_cbc.c rc2_ecb.c rc2_skey.c rc2cfb64.c rc2ofb64.c
+SRCS+= rc2_ecb.c rc2_skey.c rc2_cbc.c rc2cfb64.c rc2ofb64.c
INCS+= rc2.h
# rc4
+SRCS+= rc4_utl.c
.if ${MACHINE_ARCH} == "i386"
SRCS+= rc4-586.s
.elif ${MACHINE_ARCH} == "x86_64"
-SRCS+= rc4-x86_64.s
+SRCS+= rc4-x86_64.s rc4-md5-x86_64.s
.endif
INCS+= rc4.h
# rc5
# .. is patented, so don't compile by default
-#SRCS+= rc5_ecb.c rc5_enc.c rc5_skey.c rc5cfb64.c rc5ofb64.c
+#SRCS+= rc5_skey.c rc5_ecb.c rc5_enc.c rc5cfb64.c rc5ofb64.c
#INCS+= rc5.h
# ripemd
INCS+= ripemd.h
# rsa
-SRCS+= rsa_ameth.c rsa_asn1.c rsa_chk.c rsa_depr.c rsa_eay.c \
- rsa_err.c rsa_gen.c \
- rsa_lib.c rsa_none.c rsa_null.c rsa_oaep.c \
- rsa_pk1.c rsa_pmeth.c rsa_prn.c rsa_pss.c \
- rsa_saos.c rsa_sign.c rsa_ssl.c rsa_x931.c
+SRCS+= rsa_eay.c rsa_gen.c rsa_lib.c rsa_sign.c rsa_saos.c rsa_err.c \
+ rsa_pk1.c rsa_ssl.c rsa_none.c rsa_oaep.c rsa_chk.c rsa_null.c \
+ rsa_pss.c rsa_x931.c rsa_asn1.c rsa_depr.c rsa_ameth.c rsa_prn.c \
+ rsa_pmeth.c rsa_crpt.c
INCS+= rsa.h
# seed
-SRCS+= seed.c seed_cbc.c seed_cfb.c seed_ecb.c seed_ofb.c
+SRCS+= seed.c seed_ecb.c seed_cbc.c seed_cfb.c seed_ofb.c
INCS+= seed.h
# sha
-SRCS+= sha1_one.c sha1dgst.c sha256.c sha512.c sha_dgst.c sha_one.c
+SRCS+= sha_dgst.c sha1dgst.c sha_one.c sha1_one.c sha256.c sha512.c
.if ${MACHINE_ARCH} == "i386"
SRCS+= sha1-586.s sha256-586.s sha512-586.s
.elif ${MACHINE_ARCH} == "x86_64"
.endif
INCS+= sha.h
+# srp
+SRCS+= srp_lib.c srp_vfy.c
+INCS+= srp.h
+
# stack
SRCS+= stack.c
INCS+= stack.h safestack.h
#INCS+= store.h
# ts
-SRCS+= ts_asn1.c ts_conf.c ts_err.c ts_lib.c \
- ts_req_print.c ts_req_utils.c ts_rsp_print.c ts_rsp_sign.c \
- ts_rsp_utils.c ts_rsp_verify.c ts_verify_ctx.c
+SRCS+= ts_err.c ts_req_utils.c ts_req_print.c ts_rsp_utils.c ts_rsp_print.c \
+ ts_rsp_sign.c ts_rsp_verify.c ts_verify_ctx.c ts_lib.c ts_conf.c \
+ ts_asn1.c
INCS+= ts.h
# txt_db
INCS+= txt_db.h
# ui
-SRCS+= ui_compat.c ui_err.c ui_lib.c ui_openssl.c ui_util.c
-INCS+= ui.h ui_compat.h ui_locl.h
+SRCS+= ui_err.c ui_lib.c ui_openssl.c ui_util.c ui_compat.c
+INCS+= ui.h ui_compat.h
# whrlpool
SRCS+= wp_dgst.c
INCS+= whrlpool.h
# x509
-SRCS+= by_dir.c by_file.c x509_att.c x509_cmp.c x509_d2.c \
- x509_def.c x509_err.c x509_ext.c x509_lu.c x509_obj.c \
- x509_r2x.c x509_req.c x509_set.c x509_trs.c x509_txt.c \
- x509_v3.c x509_vfy.c x509_vpm.c x509cset.c x509name.c x509rset.c \
- x509spki.c x509type.c x_all.c
+SRCS+= x509_def.c x509_d2.c x509_r2x.c x509_cmp.c \
+ x509_obj.c x509_req.c x509spki.c x509_vfy.c \
+ x509_set.c x509cset.c x509rset.c x509_err.c \
+ x509name.c x509_v3.c x509_ext.c x509_att.c \
+ x509type.c x509_lu.c x_all.c x509_txt.c \
+ x509_trs.c by_file.c by_dir.c x509_vpm.c
INCS+= x509.h x509_vfy.h
# x509v3
-SRCS+= pcy_cache.c pcy_data.c pcy_lib.c pcy_map.c pcy_node.c pcy_tree.c \
- v3_addr.c v3_akey.c v3_akeya.c v3_asid.c v3_alt.c v3_bcons.c v3_bitst.c \
- v3_conf.c v3_cpols.c v3_crld.c v3_enum.c v3_extku.c \
- v3_genn.c v3_ia5.c v3_info.c v3_int.c v3_lib.c v3_ncons.c v3_ocsp.c \
- v3_pci.c v3_pcia.c v3_pcons.c v3_pmaps.c v3_pku.c v3_prn.c v3_purp.c \
- v3_skey.c v3_sxnet.c v3_utl.c v3err.c
+SRCS+= v3_bcons.c v3_bitst.c v3_conf.c v3_extku.c v3_ia5.c v3_lib.c \
+ v3_prn.c v3_utl.c v3err.c v3_genn.c v3_alt.c v3_skey.c v3_akey.c v3_pku.c \
+ v3_int.c v3_enum.c v3_sxnet.c v3_cpols.c v3_crld.c v3_purp.c v3_info.c \
+ v3_ocsp.c v3_akeya.c v3_pmaps.c v3_pcons.c v3_ncons.c v3_pcia.c v3_pci.c \
+ pcy_cache.c pcy_node.c pcy_data.c pcy_map.c pcy_tree.c pcy_lib.c \
+ v3_asid.c v3_addr.c
INCS+= x509v3.h
-SRCS+= buildinf.h
-INCS+= opensslconf.h
-INCSDIR= ${INCLUDEDIR}/openssl
-
-CLEANFILES+= buildinf.h opensslconf.h
-
-buildinf.h:
- ( echo "#ifndef MK1MF_BUILD"; \
- echo " /* auto-generated by crypto/Makefile.ssl for crypto/cversion.c */"; \
- echo " #define CFLAGS \"$(CC)\""; \
- echo " #define PLATFORM \"`uname -s`-`uname -m`\""; \
- echo " #define DATE \"`LC_ALL=C date`\""; \
- echo "#endif" ) > ${.TARGET}
-
-opensslconf.h: opensslconf-${MACHINE_ARCH}.h
-.if defined(WANT_IDEA)
- sed '/^# define OPENSSL_NO_IDEA$$/d;/^# define NO_IDEA$$/d' ${.ALLSRC} > ${.TARGET}
-.else
- cp ${.ALLSRC} ${.TARGET}
-.endif
-
.include <bsd.lib.mk>
+# The crypto subdirs are listed in the order of the vendor's Makefile
+# to aid future imports.
.PATH: \
${.CURDIR}/asm \
${LCRYPTO_SRC}/crypto \
+ ${LCRYPTO_SRC}/crypto/objects \
+ ${LCRYPTO_SRC}/crypto/md4 \
+ ${LCRYPTO_SRC}/crypto/md5 \
+ ${LCRYPTO_SRC}/crypto/sha \
+ ${LCRYPTO_SRC}/crypto/mdc2 \
+ ${LCRYPTO_SRC}/crypto/hmac \
+ ${LCRYPTO_SRC}/crypto/ripemd \
+ ${LCRYPTO_SRC}/crypto/whrlpool \
+ ${LCRYPTO_SRC}/crypto/des \
${LCRYPTO_SRC}/crypto/aes \
- ${LCRYPTO_SRC}/crypto/asn1 \
+ ${LCRYPTO_SRC}/crypto/rc2 \
+ ${LCRYPTO_SRC}/crypto/rc4 \
${LCRYPTO_SRC}/crypto/bf \
- ${LCRYPTO_SRC}/crypto/bio \
+ ${LCRYPTO_SRC}/crypto/cast \
+ ${LCRYPTO_SRC}/crypto/camellia \
+ ${LCRYPTO_SRC}/crypto/seed \
+ ${LCRYPTO_SRC}/crypto/modes \
${LCRYPTO_SRC}/crypto/bn \
${LCRYPTO_SRC}/crypto/bn/asm \
- ${LCRYPTO_SRC}/crypto/buffer \
- ${LCRYPTO_SRC}/crypto/camellia \
- ${LCRYPTO_SRC}/crypto/cast \
- ${LCRYPTO_SRC}/crypto/cms \
- ${LCRYPTO_SRC}/crypto/comp \
- ${LCRYPTO_SRC}/crypto/conf \
- ${LCRYPTO_SRC}/crypto/des \
- ${LCRYPTO_SRC}/crypto/dh \
- ${LCRYPTO_SRC}/crypto/dsa \
- ${LCRYPTO_SRC}/crypto/dso \
${LCRYPTO_SRC}/crypto/ec \
- ${LCRYPTO_SRC}/crypto/ecdh \
+ ${LCRYPTO_SRC}/crypto/rsa \
+ ${LCRYPTO_SRC}/crypto/dsa \
${LCRYPTO_SRC}/crypto/ecdsa \
+ ${LCRYPTO_SRC}/crypto/dh \
+ ${LCRYPTO_SRC}/crypto/ecdh \
+ ${LCRYPTO_SRC}/crypto/dso \
${LCRYPTO_SRC}/crypto/engine \
+ ${LCRYPTO_SRC}/crypto/buffer \
+ ${LCRYPTO_SRC}/crypto/bio \
+ ${LCRYPTO_SRC}/crypto/stack \
+ ${LCRYPTO_SRC}/crypto/lhash \
+ ${LCRYPTO_SRC}/crypto/rand \
${LCRYPTO_SRC}/crypto/err \
${LCRYPTO_SRC}/crypto/evp \
- ${LCRYPTO_SRC}/crypto/hmac \
- ${_ideapath} \
- ${LCRYPTO_SRC}/crypto/lhash \
- ${LCRYPTO_SRC}/crypto/md4 \
- ${LCRYPTO_SRC}/crypto/md5 \
- ${LCRYPTO_SRC}/crypto/mdc2 \
- ${LCRYPTO_SRC}/crypto/modes \
- ${LCRYPTO_SRC}/crypto/objects \
- ${LCRYPTO_SRC}/crypto/ocsp \
+ ${LCRYPTO_SRC}/crypto/asn1 \
${LCRYPTO_SRC}/crypto/pem \
- ${LCRYPTO_SRC}/crypto/pkcs12 \
+ ${LCRYPTO_SRC}/crypto/x509 \
+ ${LCRYPTO_SRC}/crypto/x509v3 \
+ ${LCRYPTO_SRC}/crypto/conf \
+ ${LCRYPTO_SRC}/crypto/txt_db \
${LCRYPTO_SRC}/crypto/pkcs7 \
+ ${LCRYPTO_SRC}/crypto/pkcs12 \
+ ${LCRYPTO_SRC}/crypto/comp \
+ ${LCRYPTO_SRC}/crypto/ocsp \
+ ${LCRYPTO_SRC}/crypto/ui \
+ ${LCRYPTO_SRC}/crypto/cms \
${LCRYPTO_SRC}/crypto/pqueue \
- ${LCRYPTO_SRC}/crypto/rand \
- ${LCRYPTO_SRC}/crypto/rc2 \
- ${LCRYPTO_SRC}/crypto/rc4 \
- ${LCRYPTO_SRC}/crypto/ripemd \
- ${LCRYPTO_SRC}/crypto/rsa \
- ${LCRYPTO_SRC}/crypto/seed \
- ${LCRYPTO_SRC}/crypto/sha \
- ${LCRYPTO_SRC}/crypto/stack \
- ${LCRYPTO_SRC}/crypto/threads \
${LCRYPTO_SRC}/crypto/ts \
- ${LCRYPTO_SRC}/crypto/txt_db \
- ${LCRYPTO_SRC}/crypto/ui \
- ${LCRYPTO_SRC}/crypto/whrlpool \
- ${LCRYPTO_SRC}/crypto/x509 \
- ${LCRYPTO_SRC}/crypto/x509v3 \
+ ${LCRYPTO_SRC}/crypto/srp \
+ ${LCRYPTO_SRC}/crypto/cmac \
+ ${_ideapath} \
${LCRYPTO_SRC} \
${.CURDIR}/man
# $FreeBSD: src/secure/lib/libcrypto/Makefile.inc,v 1.7.2.11 2003/02/20 15:07:32 nectar Exp $
# $DragonFly: src/secure/lib/libcrypto/Makefile.inc,v 1.18 2008/09/27 21:04:45 pavalos Exp $
-OSSLVERSION= 1.0.0g
-OSSLDATE= 2012-01-18
+OSSLVERSION= 1.0.1
+OSSLDATE= 2012-03-14
LCRYPTO_SRC= ${.CURDIR}/../../../crypto/openssl
LCRYPTO_DOC= ${LCRYPTO_SRC}/doc
CFLAGS+= -DDSO_DLFCN -DHAVE_DLFCN_H -DL_ENDIAN -DTERMIOS
CFLAGS+= -DOPENSSL_THREADS
-CFLAGS+= -DOPENSSL_IA32_SSE2 -DOPENSSL_BN_ASM_MONT -DSHA1_ASM \
- -DSHA256_ASM -DSHA512_ASM -DMD5_ASM -DAES_ASM -DWHIRLPOOL_ASM
+CFLAGS+= -DOPENSSL_IA32_SSE2 -DOPENSSL_BN_ASM_MONT \
+ -DOPENSSL_BN_ASM_GF2m -DSHA1_ASM \
+ -DSHA256_ASM -DSHA512_ASM -DMD5_ASM -DAES_ASM -DVPAES_ASM \
+ -DWHIRLPOOL_ASM -DGHASH_ASM
.if ${MACHINE_ARCH} == "i386"
-CFLAGS+= -DOPENSSL_BN_ASM_PART_WORDS -DRMD160_ASM
+CFLAGS+= -DOPENSSL_BN_ASM_PART_WORDS -DRMD160_ASM
+.elif ${MACHINE_ARCH} == "x86_64"
+CFLAGS+= -DOPENSSL_BN_ASM_MONT5 -DBSAES_ASM
.endif
CFLAGS+= -I${LCRYPTO_SRC} -I${LCRYPTO_SRC}/crypto \
-I${LCRYPTO_SRC}/crypto/asn1 -I${LCRYPTO_SRC}/crypto/evp \
+ -I${LCRYPTO_SRC}/crypto/modes \
-I${LCRYPTO_SRC}/crypto/engine -I${.OBJDIR}
MANDIR= ${SHAREDIR}/openssl/man/man
${OPENSSL_SRC}/crypto/camellia/asm \
${OPENSSL_SRC}/crypto/des/asm \
${OPENSSL_SRC}/crypto/md5/asm \
+ ${OPENSSL_SRC}/crypto/modes/asm \
${OPENSSL_SRC}/crypto/perlasm \
${OPENSSL_SRC}/crypto/rc4/asm \
${OPENSSL_SRC}/crypto/ripemd/asm \
# cpuid
SRCS= x86cpuid.pl
-# aes
-SRCS+= aes-586.pl
-
-# bf
-SRCS+= bf-586.pl
-
# bn
-SRCS+= bn-586.pl co-586.pl x86-mont.pl
-
-# camellia
-SRCS+= cmll-x86.pl
+SRCS+= bn-586.pl co-586.pl x86-mont.pl x86-gf2m.pl
# des
SRCS+= des-586.pl crypt586.pl
-# md5
-SRCS+= md5-586.pl
+# aes
+SRCS+= aes-586.pl vpaes-x86.pl aesni-x86.pl
+
+# bf
+SRCS+= bf-586.pl
# rc4
SRCS+= rc4-586.pl
-# ripemd
-SRCS+= rmd-586.pl
+# md5
+SRCS+= md5-586.pl
# sha
SRCS+= sha1-586.pl sha256-586.pl sha512-586.pl
+# ripemd
+SRCS+= rmd-586.pl
+
# whrlpool
SRCS+= wp-mmx.pl
+# camellia
+SRCS+= cmll-x86.pl
+
+# modes
+SRCS+= ghash-x86.pl
+
PERLFLAGS= ${CFLAGS}
.elif ${MACHINE_ARCH} == "x86_64"
# cpuid
SRCS= x86_64cpuid.pl
-# aes
-SRCS+= aes-x86_64.pl
-
# bn
-SRCS+= x86_64-mont.pl
+SRCS+= x86_64-mont.pl x86_64-mont5.pl x86_64-gf2m.pl modexp512-x86_64.pl
-# camellia
-SRCS+= cmll-x86_64.pl
+# aes
+SRCS+= aes-x86_64.pl vpaes-x86_64.pl bsaes-x86_64.pl \
+ aesni-x86_64.pl aesni-sha1-x86_64.pl
+
+# rc4
+SRCS+= rc4-x86_64.pl rc4-md5-x86_64.pl
# md5
SRCS+= md5-x86_64.pl
-# rc4
-SRCS+= rc4-x86_64.pl
-
# sha
SRCS+= sha1-x86_64.pl sha256-x86_64.s sha512-x86_64.pl
# whrlpool
SRCS+= wp-x86_64.pl
+# camellia
+SRCS+= cmll-x86_64.pl
+
+# modes
+SRCS+= ghash-x86_64.pl
+
PERLFLAGS=
.endif
popl %ebp
ret
.size _x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key
-.globl AES_set_encrypt_key
-.type AES_set_encrypt_key,@function
+.globl private_AES_set_encrypt_key
+.type private_AES_set_encrypt_key,@function
.align 16
-AES_set_encrypt_key:
-.L_AES_set_encrypt_key_begin:
+private_AES_set_encrypt_key:
+.L_private_AES_set_encrypt_key_begin:
call _x86_AES_set_encrypt_key
ret
-.size AES_set_encrypt_key,.-.L_AES_set_encrypt_key_begin
-.globl AES_set_decrypt_key
-.type AES_set_decrypt_key,@function
+.size private_AES_set_encrypt_key,.-.L_private_AES_set_encrypt_key_begin
+.globl private_AES_set_decrypt_key
+.type private_AES_set_decrypt_key,@function
.align 16
-AES_set_decrypt_key:
-.L_AES_set_decrypt_key_begin:
+private_AES_set_decrypt_key:
+.L_private_AES_set_decrypt_key_begin:
call _x86_AES_set_encrypt_key
cmpl $0,%eax
je .L054proceed
popl %ebx
popl %ebp
ret
-.size AES_set_decrypt_key,.-.L_AES_set_decrypt_key_begin
+.size private_AES_set_decrypt_key,.-.L_private_AES_set_decrypt_key_begin
.byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.comm OPENSSL_ia32cap_P,4,4
+.comm OPENSSL_ia32cap_P,8,4
.globl AES_encrypt
.type AES_encrypt,@function
.align 16
+.globl asm_AES_encrypt
+.hidden asm_AES_encrypt
+asm_AES_encrypt:
AES_encrypt:
pushq %rbx
pushq %rbp
.globl AES_decrypt
.type AES_decrypt,@function
.align 16
+.globl asm_AES_decrypt
+.hidden asm_AES_decrypt
+asm_AES_decrypt:
AES_decrypt:
pushq %rbx
pushq %rbp
.Ldec_epilogue:
.byte 0xf3,0xc3
.size AES_decrypt,.-AES_decrypt
-.globl AES_set_encrypt_key
-.type AES_set_encrypt_key,@function
+.globl private_AES_set_encrypt_key
+.type private_AES_set_encrypt_key,@function
.align 16
-AES_set_encrypt_key:
+private_AES_set_encrypt_key:
pushq %rbx
pushq %rbp
pushq %r12
addq $56,%rsp
.Lenc_key_epilogue:
.byte 0xf3,0xc3
-.size AES_set_encrypt_key,.-AES_set_encrypt_key
+.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
.type _x86_64_AES_set_encrypt_key,@function
.align 16
.Lexit:
.byte 0xf3,0xc3
.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
-.globl AES_set_decrypt_key
-.type AES_set_decrypt_key,@function
+.globl private_AES_set_decrypt_key
+.type private_AES_set_decrypt_key,@function
.align 16
-AES_set_decrypt_key:
+private_AES_set_decrypt_key:
pushq %rbx
pushq %rbp
pushq %r12
addq $56,%rsp
.Ldec_key_epilogue:
.byte 0xf3,0xc3
-.size AES_set_decrypt_key,.-AES_set_decrypt_key
+.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
.globl AES_cbc_encrypt
.type AES_cbc_encrypt,@function
.align 16
+.globl asm_AES_cbc_encrypt
+.hidden asm_AES_cbc_encrypt
+asm_AES_cbc_encrypt:
AES_cbc_encrypt:
cmpq $0,%rdx
je .Lcbc_epilogue
--- /dev/null
+.text
+
+
+.globl aesni_cbc_sha1_enc
+.type aesni_cbc_sha1_enc,@function
+.align 16
+aesni_cbc_sha1_enc:
+
+ movl OPENSSL_ia32cap_P+0(%rip),%r10d
+ movl OPENSSL_ia32cap_P+4(%rip),%r11d
+ jmp aesni_cbc_sha1_enc_ssse3
+ .byte 0xf3,0xc3
+.size aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc
+.type aesni_cbc_sha1_enc_ssse3,@function
+.align 16
+aesni_cbc_sha1_enc_ssse3:
+ movq 8(%rsp),%r10
+
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -104(%rsp),%rsp
+
+
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqu (%r8),%xmm11
+ movq %r8,88(%rsp)
+ shlq $6,%r14
+ subq %r12,%r13
+ movl 240(%r15),%r8d
+ addq %r10,%r14
+
+ leaq K_XX_XX(%rip),%r11
+ movl 0(%r9),%eax
+ movl 4(%r9),%ebx
+ movl 8(%r9),%ecx
+ movl 12(%r9),%edx
+ movl %ebx,%esi
+ movl 16(%r9),%ebp
+
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ movups (%r15),%xmm13
+ movups 16(%r15),%xmm14
+ jmp .Loop_ssse3
+.align 16
+.Loop_ssse3:
+ movdqa %xmm1,%xmm4
+ addl 0(%rsp),%ebp
+ movups 0(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ xorps %xmm12,%xmm11
+.byte 102,69,15,56,220,222
+ movups 32(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ roll $5,%eax
+ paddd %xmm3,%xmm9
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm8
+ xorl %edx,%esi
+ addl %eax,%ebp
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%ebp
+ pxor %xmm2,%xmm8
+ addl 4(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pxor %xmm8,%xmm4
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm9,48(%rsp)
+ xorl %ecx,%edi
+.byte 102,69,15,56,220,223
+ movups 48(%r15),%xmm14
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 8(%rsp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm10,%xmm9
+ rorl $7,%ebp
+ addl %esi,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ addl 12(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+.byte 102,69,15,56,220,222
+ movups 64(%r15),%xmm15
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa 0(%r11),%xmm10
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm4
+ rorl $7,%edx
+ addl %edi,%ebx
+ movdqa %xmm2,%xmm5
+ addl 16(%rsp),%eax
+ xorl %ebp,%edx
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ roll $5,%ebx
+ paddd %xmm4,%xmm10
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrldq $4,%xmm9
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm9
+ addl 20(%rsp),%ebp
+.byte 102,69,15,56,220,223
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm9,%xmm5
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm10,0(%rsp)
+ xorl %edx,%edi
+ addl %eax,%ebp
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 24(%rsp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ roll $5,%ebp
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm9
+ xorl %ecx,%esi
+.byte 102,69,15,56,220,222
+ movups 96(%r15),%xmm15
+ addl %ebp,%edx
+ movdqa %xmm8,%xmm10
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ addl 28(%rsp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ andl %eax,%edi
+ xorl %ebx,%eax
+ movdqa 16(%r11),%xmm8
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ pxor %xmm10,%xmm5
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movdqa %xmm3,%xmm6
+ addl 32(%rsp),%ebx
+ xorl %eax,%ebp
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ roll $5,%ecx
+.byte 102,69,15,56,220,223
+ movups 112(%r15),%xmm14
+ paddd %xmm5,%xmm8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ psrldq $4,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm10
+ addl 36(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm10,%xmm6
+ andl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm8,16(%rsp)
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 40(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm10
+ xorl %edx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ addl 44(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa 16(%r11),%xmm9
+ xorl %ecx,%edi
+.byte 102,69,15,56,220,223
+ movups 144(%r15),%xmm14
+ addl %ebp,%edx
+ pxor %xmm8,%xmm6
+ rorl $7,%eax
+ addl %edi,%edx
+ movdqa %xmm4,%xmm7
+ addl 48(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm6,%xmm9
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm5,%xmm8
+ addl 52(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+.byte 102,69,15,56,220,222
+ movups 160(%r15),%xmm15
+ pxor %xmm8,%xmm7
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm9,32(%rsp)
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 56(%rsp),%eax
+ xorl %ebp,%edx
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrld $31,%xmm8
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ movdqa %xmm10,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ addl 60(%rsp),%ebp
+ cmpl $11,%r8d
+ jb .Laesenclast1
+ movups 176(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 192(%r15),%xmm15
+.byte 102,69,15,56,220,222
+ je .Laesenclast1
+ movups 208(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 224(%r15),%xmm15
+.byte 102,69,15,56,220,222
+.Laesenclast1:
+.byte 102,69,15,56,221,223
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa 16(%r11),%xmm10
+ xorl %edx,%edi
+ addl %eax,%ebp
+ pxor %xmm9,%xmm7
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movdqa %xmm7,%xmm9
+ addl 0(%rsp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%ebx
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm1,%xmm0
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ xorl %ecx,%esi
+ movups 16(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,0(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+.byte 102,69,15,56,220,222
+ movups 32(%r15),%xmm15
+ addl %ebp,%edx
+ pxor %xmm9,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ psrld $30,%xmm9
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 8(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%edi
+ roll $5,%ecx
+.byte 102,69,15,56,220,223
+ movups 48(%r15),%xmm14
+ por %xmm9,%xmm0
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ movdqa %xmm0,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 16(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 64(%r15),%xmm15
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+.byte 102,69,15,56,220,223
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %eax,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 32(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %ebx,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+.byte 102,69,15,56,220,223
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ecx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ xorl %eax,%esi
+.byte 102,69,15,56,220,223
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %ebp,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast2
+ movups 176(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 192(%r15),%xmm15
+.byte 102,69,15,56,220,222
+ je .Laesenclast2
+ movups 208(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 224(%r15),%xmm15
+.byte 102,69,15,56,220,222
+.Laesenclast2:
+.byte 102,69,15,56,221,223
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ xorl %ebp,%edi
+ movdqa %xmm5,%xmm9
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ecx,%edi
+ movups 32(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,16(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+.byte 102,69,15,56,220,222
+ movups 32(%r15),%xmm15
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ xorl %edx,%ecx
+ addl 32(%rsp),%ebp
+ andl %edx,%edi
+ pxor %xmm7,%xmm6
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ addl %edi,%ebp
+ movl %eax,%edi
+ pxor %xmm9,%xmm6
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%rsp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%edi
+ rorl $7,%eax
+ psrld $30,%xmm9
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+.byte 102,69,15,56,220,223
+ movups 48(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ por %xmm9,%xmm6
+ movl %eax,%edi
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm10
+ addl 40(%rsp),%ecx
+ andl %ebx,%edi
+ andl %eax,%esi
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 44(%rsp),%ebx
+ andl %eax,%esi
+ andl %ebp,%edi
+.byte 102,69,15,56,220,222
+ movups 64(%r15),%xmm15
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%edi
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ xorl %ebp,%edx
+ addl 48(%rsp),%eax
+ andl %ebp,%edi
+ pxor %xmm0,%xmm7
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 48(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ addl %edi,%eax
+ movl %ebx,%edi
+ pxor %xmm10,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ movl %ecx,%esi
+.byte 102,69,15,56,220,223
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 52(%rsp),%ebp
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%edi
+ rorl $7,%ebx
+ psrld $30,%xmm10
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ por %xmm10,%xmm7
+ movl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm7,%xmm8
+ addl 56(%rsp),%edx
+ andl %ecx,%edi
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+.byte 102,69,15,56,220,222
+ movups 96(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%rsp),%ecx
+ andl %ebx,%esi
+ andl %eax,%edi
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%edi
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ xorl %eax,%ebp
+ addl 0(%rsp),%ebx
+ andl %eax,%edi
+ pxor %xmm1,%xmm0
+ andl %ebp,%esi
+.byte 102,69,15,56,220,223
+ movups 112(%r15),%xmm14
+ rorl $7,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ addl %edi,%ebx
+ movl %ecx,%edi
+ pxor %xmm8,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 4(%rsp),%eax
+ andl %ebp,%esi
+ pslld $2,%xmm0
+ andl %edx,%edi
+ rorl $7,%ecx
+ psrld $30,%xmm8
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ por %xmm8,%xmm0
+ movl %ecx,%edi
+.byte 102,69,15,56,220,222
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm0,%xmm9
+ addl 8(%rsp),%ebp
+ andl %edx,%edi
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%rsp),%edx
+ andl %ecx,%esi
+ andl %ebx,%edi
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+.byte 102,69,15,56,220,223
+ movups 144(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%edi
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ xorl %ebx,%eax
+ addl 16(%rsp),%ecx
+ andl %ebx,%edi
+ pxor %xmm2,%xmm1
+ andl %eax,%esi
+ rorl $7,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ addl %edi,%ecx
+ movl %edx,%edi
+ pxor %xmm9,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 20(%rsp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %ebp,%edi
+.byte 102,69,15,56,220,222
+ movups 160(%r15),%xmm15
+ rorl $7,%edx
+ psrld $30,%xmm9
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ por %xmm9,%xmm1
+ movl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm1,%xmm10
+ addl 24(%rsp),%eax
+ andl %ebp,%edi
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ cmpl $11,%r8d
+ jb .Laesenclast3
+ movups 176(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 192(%r15),%xmm15
+.byte 102,69,15,56,220,222
+ je .Laesenclast3
+ movups 208(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 224(%r15),%xmm15
+.byte 102,69,15,56,220,222
+.Laesenclast3:
+.byte 102,69,15,56,221,223
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 28(%rsp),%ebp
+ andl %edx,%esi
+ andl %ecx,%edi
+ rorl $7,%ebx
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%edi
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ xorl %ecx,%ebx
+ addl 32(%rsp),%edx
+ andl %ecx,%edi
+ pxor %xmm3,%xmm2
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ addl %edi,%edx
+ movl %ebp,%edi
+ pxor %xmm10,%xmm2
+ roll $5,%ebp
+ movups 48(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,32(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+.byte 102,69,15,56,220,222
+ movups 32(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%rsp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%edi
+ rorl $7,%ebp
+ psrld $30,%xmm10
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm10,%xmm2
+ movl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm2,%xmm8
+ addl 40(%rsp),%ebx
+ andl %eax,%edi
+ andl %ebp,%esi
+.byte 102,69,15,56,220,223
+ movups 48(%r15),%xmm14
+ rorl $7,%edx
+ addl %edi,%ebx
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 44(%rsp),%eax
+ andl %ebp,%esi
+ andl %edx,%edi
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 64(%r15),%xmm15
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+.byte 102,69,15,56,220,223
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm10,48(%rsp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 8(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+.byte 102,69,15,56,220,223
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ cmpq %r14,%r10
+ je .Ldone_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,0(%rsp)
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ psubd %xmm9,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm9,%xmm1
+ xorl %eax,%esi
+.byte 102,69,15,56,220,223
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movdqa %xmm1,16(%rsp)
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ psubd %xmm9,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ paddd %xmm9,%xmm2
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%rsp)
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ psubd %xmm9,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast4
+ movups 176(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 192(%r15),%xmm15
+.byte 102,69,15,56,220,222
+ je .Laesenclast4
+ movups 208(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 224(%r15),%xmm15
+.byte 102,69,15,56,220,222
+.Laesenclast4:
+.byte 102,69,15,56,221,223
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ leaq 64(%r12),%r12
+
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ addl 12(%r9),%edx
+ movl %eax,0(%r9)
+ addl 16(%r9),%ebp
+ movl %esi,4(%r9)
+ movl %esi,%ebx
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ jmp .Loop_ssse3
+
+.align 16
+.Ldone_ssse3:
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+.byte 102,69,15,56,220,223
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast5
+ movups 176(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 192(%r15),%xmm15
+.byte 102,69,15,56,220,222
+ je .Laesenclast5
+ movups 208(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 224(%r15),%xmm15
+.byte 102,69,15,56,220,222
+.Laesenclast5:
+.byte 102,69,15,56,221,223
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ movq 88(%rsp),%r8
+
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ movl %eax,0(%r9)
+ addl 12(%r9),%edx
+ movl %esi,4(%r9)
+ addl 16(%r9),%ebp
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ movups %xmm11,(%r8)
+ leaq 104(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue_ssse3:
+ .byte 0xf3,0xc3
+.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3
+.align 64
+K_XX_XX:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+
+.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
--- /dev/null
+.file "../../../../crypto/openssl/crypto/aes/asm/aesni-x86.s"
+.text
+.globl aesni_encrypt
+.type aesni_encrypt,@function
+.align 16
+aesni_encrypt:
+.L_aesni_encrypt_begin:
+ movl 4(%esp),%eax
+ movl 12(%esp),%edx
+ movups (%eax),%xmm2
+ movl 240(%edx),%ecx
+ movl 8(%esp),%eax
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L000enc1_loop_1:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L000enc1_loop_1
+.byte 102,15,56,221,209
+ movups %xmm2,(%eax)
+ ret
+.size aesni_encrypt,.-.L_aesni_encrypt_begin
+.globl aesni_decrypt
+.type aesni_decrypt,@function
+.align 16
+aesni_decrypt:
+.L_aesni_decrypt_begin:
+ movl 4(%esp),%eax
+ movl 12(%esp),%edx
+ movups (%eax),%xmm2
+ movl 240(%edx),%ecx
+ movl 8(%esp),%eax
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L001dec1_loop_2:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L001dec1_loop_2
+.byte 102,15,56,223,209
+ movups %xmm2,(%eax)
+ ret
+.size aesni_decrypt,.-.L_aesni_decrypt_begin
+.type _aesni_encrypt3,@function
+.align 16
+_aesni_encrypt3:
+ movups (%edx),%xmm0
+ shrl $1,%ecx
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ movups (%edx),%xmm0
+.L002enc3_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ decl %ecx
+.byte 102,15,56,220,225
+ movups 16(%edx),%xmm1
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ leal 32(%edx),%edx
+.byte 102,15,56,220,224
+ movups (%edx),%xmm0
+ jnz .L002enc3_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+.byte 102,15,56,221,224
+ ret
+.size _aesni_encrypt3,.-_aesni_encrypt3
+.type _aesni_decrypt3,@function
+.align 16
+_aesni_decrypt3:
+ movups (%edx),%xmm0
+ shrl $1,%ecx
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ movups (%edx),%xmm0
+.L003dec3_loop:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ decl %ecx
+.byte 102,15,56,222,225
+ movups 16(%edx),%xmm1
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ leal 32(%edx),%edx
+.byte 102,15,56,222,224
+ movups (%edx),%xmm0
+ jnz .L003dec3_loop
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+.byte 102,15,56,223,224
+ ret
+.size _aesni_decrypt3,.-_aesni_decrypt3
+.type _aesni_encrypt4,@function
+.align 16
+_aesni_encrypt4:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ shrl $1,%ecx
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+ movups (%edx),%xmm0
+.L004enc4_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ decl %ecx
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+ movups 16(%edx),%xmm1
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ leal 32(%edx),%edx
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+ movups (%edx),%xmm0
+ jnz .L004enc4_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+.byte 102,15,56,221,224
+.byte 102,15,56,221,232
+ ret
+.size _aesni_encrypt4,.-_aesni_encrypt4
+.type _aesni_decrypt4,@function
+.align 16
+_aesni_decrypt4:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ shrl $1,%ecx
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+ movups (%edx),%xmm0
+.L005dec4_loop:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ decl %ecx
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+ movups 16(%edx),%xmm1
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ leal 32(%edx),%edx
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+ movups (%edx),%xmm0
+ jnz .L005dec4_loop
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+.byte 102,15,56,223,224
+.byte 102,15,56,223,232
+ ret
+.size _aesni_decrypt4,.-_aesni_decrypt4
+.type _aesni_encrypt6,@function
+.align 16
+_aesni_encrypt6:
+ movups (%edx),%xmm0
+ shrl $1,%ecx
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+.byte 102,15,56,220,209
+ pxor %xmm0,%xmm4
+.byte 102,15,56,220,217
+ pxor %xmm0,%xmm5
+ decl %ecx
+.byte 102,15,56,220,225
+ pxor %xmm0,%xmm6
+.byte 102,15,56,220,233
+ pxor %xmm0,%xmm7
+.byte 102,15,56,220,241
+ movups (%edx),%xmm0
+.byte 102,15,56,220,249
+ jmp .L_aesni_encrypt6_enter
+.align 16
+.L006enc6_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ decl %ecx
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+.align 16
+.L_aesni_encrypt6_enter:
+ movups 16(%edx),%xmm1
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ leal 32(%edx),%edx
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+.byte 102,15,56,220,240
+.byte 102,15,56,220,248
+ movups (%edx),%xmm0
+ jnz .L006enc6_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+.byte 102,15,56,221,224
+.byte 102,15,56,221,232
+.byte 102,15,56,221,240
+.byte 102,15,56,221,248
+ ret
+.size _aesni_encrypt6,.-_aesni_encrypt6
+.type _aesni_decrypt6,@function
+.align 16
+_aesni_decrypt6:
+ movups (%edx),%xmm0
+ shrl $1,%ecx
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+.byte 102,15,56,222,209
+ pxor %xmm0,%xmm4
+.byte 102,15,56,222,217
+ pxor %xmm0,%xmm5
+ decl %ecx
+.byte 102,15,56,222,225
+ pxor %xmm0,%xmm6
+.byte 102,15,56,222,233
+ pxor %xmm0,%xmm7
+.byte 102,15,56,222,241
+ movups (%edx),%xmm0
+.byte 102,15,56,222,249
+ jmp .L_aesni_decrypt6_enter
+.align 16
+.L007dec6_loop:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ decl %ecx
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+.align 16
+.L_aesni_decrypt6_enter:
+ movups 16(%edx),%xmm1
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ leal 32(%edx),%edx
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+.byte 102,15,56,222,240
+.byte 102,15,56,222,248
+ movups (%edx),%xmm0
+ jnz .L007dec6_loop
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+.byte 102,15,56,223,224
+.byte 102,15,56,223,232
+.byte 102,15,56,223,240
+.byte 102,15,56,223,248
+ ret
+.size _aesni_decrypt6,.-_aesni_decrypt6
+.globl aesni_ecb_encrypt
+.type aesni_ecb_encrypt,@function
+.align 16
+aesni_ecb_encrypt:
+.L_aesni_ecb_encrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl 36(%esp),%ebx
+ andl $-16,%eax
+ jz .L008ecb_ret
+ movl 240(%edx),%ecx
+ testl %ebx,%ebx
+ jz .L009ecb_decrypt
+ movl %edx,%ebp
+ movl %ecx,%ebx
+ cmpl $96,%eax
+ jb .L010ecb_enc_tail
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+ subl $96,%eax
+ jmp .L011ecb_enc_loop6_enter
+.align 16
+.L012ecb_enc_loop6:
+ movups %xmm2,(%edi)
+ movdqu (%esi),%xmm2
+ movups %xmm3,16(%edi)
+ movdqu 16(%esi),%xmm3
+ movups %xmm4,32(%edi)
+ movdqu 32(%esi),%xmm4
+ movups %xmm5,48(%edi)
+ movdqu 48(%esi),%xmm5
+ movups %xmm6,64(%edi)
+ movdqu 64(%esi),%xmm6
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+.L011ecb_enc_loop6_enter:
+ call _aesni_encrypt6
+ movl %ebp,%edx
+ movl %ebx,%ecx
+ subl $96,%eax
+ jnc .L012ecb_enc_loop6
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ addl $96,%eax
+ jz .L008ecb_ret
+.L010ecb_enc_tail:
+ movups (%esi),%xmm2
+ cmpl $32,%eax
+ jb .L013ecb_enc_one
+ movups 16(%esi),%xmm3
+ je .L014ecb_enc_two
+ movups 32(%esi),%xmm4
+ cmpl $64,%eax
+ jb .L015ecb_enc_three
+ movups 48(%esi),%xmm5
+ je .L016ecb_enc_four
+ movups 64(%esi),%xmm6
+ xorps %xmm7,%xmm7
+ call _aesni_encrypt6
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ jmp .L008ecb_ret
+.align 16
+.L013ecb_enc_one:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L017enc1_loop_3:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L017enc1_loop_3
+.byte 102,15,56,221,209
+ movups %xmm2,(%edi)
+ jmp .L008ecb_ret
+.align 16
+.L014ecb_enc_two:
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ jmp .L008ecb_ret
+.align 16
+.L015ecb_enc_three:
+ call _aesni_encrypt3
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ jmp .L008ecb_ret
+.align 16
+.L016ecb_enc_four:
+ call _aesni_encrypt4
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ jmp .L008ecb_ret
+.align 16
+.L009ecb_decrypt:
+ movl %edx,%ebp
+ movl %ecx,%ebx
+ cmpl $96,%eax
+ jb .L018ecb_dec_tail
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+ subl $96,%eax
+ jmp .L019ecb_dec_loop6_enter
+.align 16
+.L020ecb_dec_loop6:
+ movups %xmm2,(%edi)
+ movdqu (%esi),%xmm2
+ movups %xmm3,16(%edi)
+ movdqu 16(%esi),%xmm3
+ movups %xmm4,32(%edi)
+ movdqu 32(%esi),%xmm4
+ movups %xmm5,48(%edi)
+ movdqu 48(%esi),%xmm5
+ movups %xmm6,64(%edi)
+ movdqu 64(%esi),%xmm6
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+.L019ecb_dec_loop6_enter:
+ call _aesni_decrypt6
+ movl %ebp,%edx
+ movl %ebx,%ecx
+ subl $96,%eax
+ jnc .L020ecb_dec_loop6
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ addl $96,%eax
+ jz .L008ecb_ret
+.L018ecb_dec_tail:
+ movups (%esi),%xmm2
+ cmpl $32,%eax
+ jb .L021ecb_dec_one
+ movups 16(%esi),%xmm3
+ je .L022ecb_dec_two
+ movups 32(%esi),%xmm4
+ cmpl $64,%eax
+ jb .L023ecb_dec_three
+ movups 48(%esi),%xmm5
+ je .L024ecb_dec_four
+ movups 64(%esi),%xmm6
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ jmp .L008ecb_ret
+.align 16
+.L021ecb_dec_one:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L025dec1_loop_4:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L025dec1_loop_4
+.byte 102,15,56,223,209
+ movups %xmm2,(%edi)
+ jmp .L008ecb_ret
+.align 16
+.L022ecb_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ jmp .L008ecb_ret
+.align 16
+.L023ecb_dec_three:
+ call _aesni_decrypt3
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ jmp .L008ecb_ret
+.align 16
+.L024ecb_dec_four:
+ call _aesni_decrypt4
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+.L008ecb_ret:
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin
+.globl aesni_ccm64_encrypt_blocks
+.type aesni_ccm64_encrypt_blocks,@function
+.align 16
+aesni_ccm64_encrypt_blocks:
+.L_aesni_ccm64_encrypt_blocks_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl 36(%esp),%ebx
+ movl 40(%esp),%ecx
+ movl %esp,%ebp
+ subl $60,%esp
+ andl $-16,%esp
+ movl %ebp,48(%esp)
+ movdqu (%ebx),%xmm7
+ movdqu (%ecx),%xmm3
+ movl 240(%edx),%ecx
+ movl $202182159,(%esp)
+ movl $134810123,4(%esp)
+ movl $67438087,8(%esp)
+ movl $66051,12(%esp)
+ movl $1,%ebx
+ xorl %ebp,%ebp
+ movl %ebx,16(%esp)
+ movl %ebp,20(%esp)
+ movl %ebp,24(%esp)
+ movl %ebp,28(%esp)
+ shrl $1,%ecx
+ leal (%edx),%ebp
+ movdqa (%esp),%xmm5
+ movdqa %xmm7,%xmm2
+ movl %ecx,%ebx
+.byte 102,15,56,0,253
+.L026ccm64_enc_outer:
+ movups (%ebp),%xmm0
+ movl %ebx,%ecx
+ movups (%esi),%xmm6
+ xorps %xmm0,%xmm2
+ movups 16(%ebp),%xmm1
+ xorps %xmm6,%xmm0
+ leal 32(%ebp),%edx
+ xorps %xmm0,%xmm3
+ movups (%edx),%xmm0
+.L027ccm64_enc2_loop:
+.byte 102,15,56,220,209
+ decl %ecx
+.byte 102,15,56,220,217
+ movups 16(%edx),%xmm1
+.byte 102,15,56,220,208
+ leal 32(%edx),%edx
+.byte 102,15,56,220,216
+ movups (%edx),%xmm0
+ jnz .L027ccm64_enc2_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ paddq 16(%esp),%xmm7
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+ decl %eax
+ leal 16(%esi),%esi
+ xorps %xmm2,%xmm6
+ movdqa %xmm7,%xmm2
+ movups %xmm6,(%edi)
+ leal 16(%edi),%edi
+.byte 102,15,56,0,213
+ jnz .L026ccm64_enc_outer
+ movl 48(%esp),%esp
+ movl 40(%esp),%edi
+ movups %xmm3,(%edi)
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin
+.globl aesni_ccm64_decrypt_blocks
+.type aesni_ccm64_decrypt_blocks,@function
+.align 16
+aesni_ccm64_decrypt_blocks:
+.L_aesni_ccm64_decrypt_blocks_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl 36(%esp),%ebx
+ movl 40(%esp),%ecx
+ movl %esp,%ebp
+ subl $60,%esp
+ andl $-16,%esp
+ movl %ebp,48(%esp)
+ movdqu (%ebx),%xmm7
+ movdqu (%ecx),%xmm3
+ movl 240(%edx),%ecx
+ movl $202182159,(%esp)
+ movl $134810123,4(%esp)
+ movl $67438087,8(%esp)
+ movl $66051,12(%esp)
+ movl $1,%ebx
+ xorl %ebp,%ebp
+ movl %ebx,16(%esp)
+ movl %ebp,20(%esp)
+ movl %ebp,24(%esp)
+ movl %ebp,28(%esp)
+ movdqa (%esp),%xmm5
+ movdqa %xmm7,%xmm2
+ movl %edx,%ebp
+ movl %ecx,%ebx
+.byte 102,15,56,0,253
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L028enc1_loop_5:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L028enc1_loop_5
+.byte 102,15,56,221,209
+ movups (%esi),%xmm6
+ paddq 16(%esp),%xmm7
+ leal 16(%esi),%esi
+ jmp .L029ccm64_dec_outer
+.align 16
+.L029ccm64_dec_outer:
+ xorps %xmm2,%xmm6
+ movdqa %xmm7,%xmm2
+ movl %ebx,%ecx
+ movups %xmm6,(%edi)
+ leal 16(%edi),%edi
+.byte 102,15,56,0,213
+ subl $1,%eax
+ jz .L030ccm64_dec_break
+ movups (%ebp),%xmm0
+ shrl $1,%ecx
+ movups 16(%ebp),%xmm1
+ xorps %xmm0,%xmm6
+ leal 32(%ebp),%edx
+ xorps %xmm0,%xmm2
+ xorps %xmm6,%xmm3
+ movups (%edx),%xmm0
+.L031ccm64_dec2_loop:
+.byte 102,15,56,220,209
+ decl %ecx
+.byte 102,15,56,220,217
+ movups 16(%edx),%xmm1
+.byte 102,15,56,220,208
+ leal 32(%edx),%edx
+.byte 102,15,56,220,216
+ movups (%edx),%xmm0
+ jnz .L031ccm64_dec2_loop
+ movups (%esi),%xmm6
+ paddq 16(%esp),%xmm7
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ leal 16(%esi),%esi
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+ jmp .L029ccm64_dec_outer
+.align 16
+.L030ccm64_dec_break:
+ movl %ebp,%edx
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm6
+ leal 32(%edx),%edx
+ xorps %xmm6,%xmm3
+.L032enc1_loop_6:
+.byte 102,15,56,220,217
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L032enc1_loop_6
+.byte 102,15,56,221,217
+ movl 48(%esp),%esp
+ movl 40(%esp),%edi
+ movups %xmm3,(%edi)
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin
+.globl aesni_ctr32_encrypt_blocks
+.type aesni_ctr32_encrypt_blocks,@function
+.align 16
+aesni_ctr32_encrypt_blocks:
+.L_aesni_ctr32_encrypt_blocks_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl 36(%esp),%ebx
+ movl %esp,%ebp
+ subl $88,%esp
+ andl $-16,%esp
+ movl %ebp,80(%esp)
+ cmpl $1,%eax
+ je .L033ctr32_one_shortcut
+ movdqu (%ebx),%xmm7
+ movl $202182159,(%esp)
+ movl $134810123,4(%esp)
+ movl $67438087,8(%esp)
+ movl $66051,12(%esp)
+ movl $6,%ecx
+ xorl %ebp,%ebp
+ movl %ecx,16(%esp)
+ movl %ecx,20(%esp)
+ movl %ecx,24(%esp)
+ movl %ebp,28(%esp)
+.byte 102,15,58,22,251,3
+.byte 102,15,58,34,253,3
+ movl 240(%edx),%ecx
+ bswap %ebx
+ pxor %xmm1,%xmm1
+ pxor %xmm0,%xmm0
+ movdqa (%esp),%xmm2
+.byte 102,15,58,34,203,0
+ leal 3(%ebx),%ebp
+.byte 102,15,58,34,197,0
+ incl %ebx
+.byte 102,15,58,34,203,1
+ incl %ebp
+.byte 102,15,58,34,197,1
+ incl %ebx
+.byte 102,15,58,34,203,2
+ incl %ebp
+.byte 102,15,58,34,197,2
+ movdqa %xmm1,48(%esp)
+.byte 102,15,56,0,202
+ movdqa %xmm0,64(%esp)
+.byte 102,15,56,0,194
+ pshufd $192,%xmm1,%xmm2
+ pshufd $128,%xmm1,%xmm3
+ cmpl $6,%eax
+ jb .L034ctr32_tail
+ movdqa %xmm7,32(%esp)
+ shrl $1,%ecx
+ movl %edx,%ebp
+ movl %ecx,%ebx
+ subl $6,%eax
+ jmp .L035ctr32_loop6
+.align 16
+.L035ctr32_loop6:
+ pshufd $64,%xmm1,%xmm4
+ movdqa 32(%esp),%xmm1
+ pshufd $192,%xmm0,%xmm5
+ por %xmm1,%xmm2
+ pshufd $128,%xmm0,%xmm6
+ por %xmm1,%xmm3
+ pshufd $64,%xmm0,%xmm7
+ por %xmm1,%xmm4
+ por %xmm1,%xmm5
+ por %xmm1,%xmm6
+ por %xmm1,%xmm7
+ movups (%ebp),%xmm0
+ movups 16(%ebp),%xmm1
+ leal 32(%ebp),%edx
+ decl %ecx
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+.byte 102,15,56,220,209
+ pxor %xmm0,%xmm4
+.byte 102,15,56,220,217
+ pxor %xmm0,%xmm5
+.byte 102,15,56,220,225
+ pxor %xmm0,%xmm6
+.byte 102,15,56,220,233
+ pxor %xmm0,%xmm7
+.byte 102,15,56,220,241
+ movups (%edx),%xmm0
+.byte 102,15,56,220,249
+ call .L_aesni_encrypt6_enter
+ movups (%esi),%xmm1
+ movups 16(%esi),%xmm0
+ xorps %xmm1,%xmm2
+ movups 32(%esi),%xmm1
+ xorps %xmm0,%xmm3
+ movups %xmm2,(%edi)
+ movdqa 16(%esp),%xmm0
+ xorps %xmm1,%xmm4
+ movdqa 48(%esp),%xmm1
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ paddd %xmm0,%xmm1
+ paddd 64(%esp),%xmm0
+ movdqa (%esp),%xmm2
+ movups 48(%esi),%xmm3
+ movups 64(%esi),%xmm4
+ xorps %xmm3,%xmm5
+ movups 80(%esi),%xmm3
+ leal 96(%esi),%esi
+ movdqa %xmm1,48(%esp)
+.byte 102,15,56,0,202
+ xorps %xmm4,%xmm6
+ movups %xmm5,48(%edi)
+ xorps %xmm3,%xmm7
+ movdqa %xmm0,64(%esp)
+.byte 102,15,56,0,194
+ movups %xmm6,64(%edi)
+ pshufd $192,%xmm1,%xmm2
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ movl %ebx,%ecx
+ pshufd $128,%xmm1,%xmm3
+ subl $6,%eax
+ jnc .L035ctr32_loop6
+ addl $6,%eax
+ jz .L036ctr32_ret
+ movl %ebp,%edx
+ leal 1(,%ecx,2),%ecx
+ movdqa 32(%esp),%xmm7
+.L034ctr32_tail:
+ por %xmm7,%xmm2
+ cmpl $2,%eax
+ jb .L037ctr32_one
+ pshufd $64,%xmm1,%xmm4
+ por %xmm7,%xmm3
+ je .L038ctr32_two
+ pshufd $192,%xmm0,%xmm5
+ por %xmm7,%xmm4
+ cmpl $4,%eax
+ jb .L039ctr32_three
+ pshufd $128,%xmm0,%xmm6
+ por %xmm7,%xmm5
+ je .L040ctr32_four
+ por %xmm7,%xmm6
+ call _aesni_encrypt6
+ movups (%esi),%xmm1
+ movups 16(%esi),%xmm0
+ xorps %xmm1,%xmm2
+ movups 32(%esi),%xmm1
+ xorps %xmm0,%xmm3
+ movups 48(%esi),%xmm0
+ xorps %xmm1,%xmm4
+ movups 64(%esi),%xmm1
+ xorps %xmm0,%xmm5
+ movups %xmm2,(%edi)
+ xorps %xmm1,%xmm6
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ jmp .L036ctr32_ret
+.align 16
+.L033ctr32_one_shortcut:
+ movups (%ebx),%xmm2
+ movl 240(%edx),%ecx
+.L037ctr32_one:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L041enc1_loop_7:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L041enc1_loop_7
+.byte 102,15,56,221,209
+ movups (%esi),%xmm6
+ xorps %xmm2,%xmm6
+ movups %xmm6,(%edi)
+ jmp .L036ctr32_ret
+.align 16
+.L038ctr32_two:
+ call _aesni_encrypt3
+ movups (%esi),%xmm5
+ movups 16(%esi),%xmm6
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ jmp .L036ctr32_ret
+.align 16
+.L039ctr32_three:
+ call _aesni_encrypt3
+ movups (%esi),%xmm5
+ movups 16(%esi),%xmm6
+ xorps %xmm5,%xmm2
+ movups 32(%esi),%xmm7
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ jmp .L036ctr32_ret
+.align 16
+.L040ctr32_four:
+ call _aesni_encrypt4
+ movups (%esi),%xmm6
+ movups 16(%esi),%xmm7
+ movups 32(%esi),%xmm1
+ xorps %xmm6,%xmm2
+ movups 48(%esi),%xmm0
+ xorps %xmm7,%xmm3
+ movups %xmm2,(%edi)
+ xorps %xmm1,%xmm4
+ movups %xmm3,16(%edi)
+ xorps %xmm0,%xmm5
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+.L036ctr32_ret:
+ movl 80(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin
+.globl aesni_xts_encrypt
+.type aesni_xts_encrypt,@function
+.align 16
+aesni_xts_encrypt:
+.L_aesni_xts_encrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 36(%esp),%edx
+ movl 40(%esp),%esi
+ movl 240(%edx),%ecx
+ movups (%esi),%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L042enc1_loop_8:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L042enc1_loop_8
+.byte 102,15,56,221,209
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl %esp,%ebp
+ subl $120,%esp
+ movl 240(%edx),%ecx
+ andl $-16,%esp
+ movl $135,96(%esp)
+ movl $0,100(%esp)
+ movl $1,104(%esp)
+ movl $0,108(%esp)
+ movl %eax,112(%esp)
+ movl %ebp,116(%esp)
+ movdqa %xmm2,%xmm1
+ pxor %xmm0,%xmm0
+ movdqa 96(%esp),%xmm3
+ pcmpgtd %xmm1,%xmm0
+ andl $-16,%eax
+ movl %edx,%ebp
+ movl %ecx,%ebx
+ subl $96,%eax
+ jc .L043xts_enc_short
+ shrl $1,%ecx
+ movl %ecx,%ebx
+ jmp .L044xts_enc_loop6
+.align 16
+.L044xts_enc_loop6:
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,16(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,32(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,48(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm7
+ movdqa %xmm1,64(%esp)
+ paddq %xmm1,%xmm1
+ movups (%ebp),%xmm0
+ pand %xmm3,%xmm7
+ movups (%esi),%xmm2
+ pxor %xmm1,%xmm7
+ movdqu 16(%esi),%xmm3
+ xorps %xmm0,%xmm2
+ movdqu 32(%esi),%xmm4
+ pxor %xmm0,%xmm3
+ movdqu 48(%esi),%xmm5
+ pxor %xmm0,%xmm4
+ movdqu 64(%esi),%xmm6
+ pxor %xmm0,%xmm5
+ movdqu 80(%esi),%xmm1
+ pxor %xmm0,%xmm6
+ leal 96(%esi),%esi
+ pxor (%esp),%xmm2
+ movdqa %xmm7,80(%esp)
+ pxor %xmm1,%xmm7
+ movups 16(%ebp),%xmm1
+ leal 32(%ebp),%edx
+ pxor 16(%esp),%xmm3
+.byte 102,15,56,220,209
+ pxor 32(%esp),%xmm4
+.byte 102,15,56,220,217
+ pxor 48(%esp),%xmm5
+ decl %ecx
+.byte 102,15,56,220,225
+ pxor 64(%esp),%xmm6
+.byte 102,15,56,220,233
+ pxor %xmm0,%xmm7
+.byte 102,15,56,220,241
+ movups (%edx),%xmm0
+.byte 102,15,56,220,249
+ call .L_aesni_encrypt6_enter
+ movdqa 80(%esp),%xmm1
+ pxor %xmm0,%xmm0
+ xorps (%esp),%xmm2
+ pcmpgtd %xmm1,%xmm0
+ xorps 16(%esp),%xmm3
+ movups %xmm2,(%edi)
+ xorps 32(%esp),%xmm4
+ movups %xmm3,16(%edi)
+ xorps 48(%esp),%xmm5
+ movups %xmm4,32(%edi)
+ xorps 64(%esp),%xmm6
+ movups %xmm5,48(%edi)
+ xorps %xmm1,%xmm7
+ movups %xmm6,64(%edi)
+ pshufd $19,%xmm0,%xmm2
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ movdqa 96(%esp),%xmm3
+ pxor %xmm0,%xmm0
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ movl %ebx,%ecx
+ pxor %xmm2,%xmm1
+ subl $96,%eax
+ jnc .L044xts_enc_loop6
+ leal 1(,%ecx,2),%ecx
+ movl %ebp,%edx
+ movl %ecx,%ebx
+.L043xts_enc_short:
+ addl $96,%eax
+ jz .L045xts_enc_done6x
+ movdqa %xmm1,%xmm5
+ cmpl $32,%eax
+ jb .L046xts_enc_one
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ je .L047xts_enc_two
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,%xmm6
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ cmpl $64,%eax
+ jb .L048xts_enc_three
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,%xmm7
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ movdqa %xmm5,(%esp)
+ movdqa %xmm6,16(%esp)
+ je .L049xts_enc_four
+ movdqa %xmm7,32(%esp)
+ pshufd $19,%xmm0,%xmm7
+ movdqa %xmm1,48(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm7
+ pxor %xmm1,%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ pxor (%esp),%xmm2
+ movdqu 48(%esi),%xmm5
+ pxor 16(%esp),%xmm3
+ movdqu 64(%esi),%xmm6
+ pxor 32(%esp),%xmm4
+ leal 80(%esi),%esi
+ pxor 48(%esp),%xmm5
+ movdqa %xmm7,64(%esp)
+ pxor %xmm7,%xmm6
+ call _aesni_encrypt6
+ movaps 64(%esp),%xmm1
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps 32(%esp),%xmm4
+ movups %xmm2,(%edi)
+ xorps 48(%esp),%xmm5
+ movups %xmm3,16(%edi)
+ xorps %xmm1,%xmm6
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ leal 80(%edi),%edi
+ jmp .L050xts_enc_done
+.align 16
+.L046xts_enc_one:
+ movups (%esi),%xmm2
+ leal 16(%esi),%esi
+ xorps %xmm5,%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L051enc1_loop_9:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L051enc1_loop_9
+.byte 102,15,56,221,209
+ xorps %xmm5,%xmm2
+ movups %xmm2,(%edi)
+ leal 16(%edi),%edi
+ movdqa %xmm5,%xmm1
+ jmp .L050xts_enc_done
+.align 16
+.L047xts_enc_two:
+ movaps %xmm1,%xmm6
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ leal 32(%esi),%esi
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ leal 32(%edi),%edi
+ movdqa %xmm6,%xmm1
+ jmp .L050xts_enc_done
+.align 16
+.L048xts_enc_three:
+ movaps %xmm1,%xmm7
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ movups 32(%esi),%xmm4
+ leal 48(%esi),%esi
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ call _aesni_encrypt3
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ leal 48(%edi),%edi
+ movdqa %xmm7,%xmm1
+ jmp .L050xts_enc_done
+.align 16
+.L049xts_enc_four:
+ movaps %xmm1,%xmm6
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ movups 32(%esi),%xmm4
+ xorps (%esp),%xmm2
+ movups 48(%esi),%xmm5
+ leal 64(%esi),%esi
+ xorps 16(%esp),%xmm3
+ xorps %xmm7,%xmm4
+ xorps %xmm6,%xmm5
+ call _aesni_encrypt4
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps %xmm7,%xmm4
+ movups %xmm2,(%edi)
+ xorps %xmm6,%xmm5
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ leal 64(%edi),%edi
+ movdqa %xmm6,%xmm1
+ jmp .L050xts_enc_done
+.align 16
+.L045xts_enc_done6x:
+ movl 112(%esp),%eax
+ andl $15,%eax
+ jz .L052xts_enc_ret
+ movdqa %xmm1,%xmm5
+ movl %eax,112(%esp)
+ jmp .L053xts_enc_steal
+.align 16
+.L050xts_enc_done:
+ movl 112(%esp),%eax
+ pxor %xmm0,%xmm0
+ andl $15,%eax
+ jz .L052xts_enc_ret
+ pcmpgtd %xmm1,%xmm0
+ movl %eax,112(%esp)
+ pshufd $19,%xmm0,%xmm5
+ paddq %xmm1,%xmm1
+ pand 96(%esp),%xmm5
+ pxor %xmm1,%xmm5
+.L053xts_enc_steal:
+ movzbl (%esi),%ecx
+ movzbl -16(%edi),%edx
+ leal 1(%esi),%esi
+ movb %cl,-16(%edi)
+ movb %dl,(%edi)
+ leal 1(%edi),%edi
+ subl $1,%eax
+ jnz .L053xts_enc_steal
+ subl 112(%esp),%edi
+ movl %ebp,%edx
+ movl %ebx,%ecx
+ movups -16(%edi),%xmm2
+ xorps %xmm5,%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L054enc1_loop_10:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L054enc1_loop_10
+.byte 102,15,56,221,209
+ xorps %xmm5,%xmm2
+ movups %xmm2,-16(%edi)
+.L052xts_enc_ret:
+ movl 116(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin
+.globl aesni_xts_decrypt
+.type aesni_xts_decrypt,@function
+.align 16
+aesni_xts_decrypt:
+.L_aesni_xts_decrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 36(%esp),%edx
+ movl 40(%esp),%esi
+ movl 240(%edx),%ecx
+ movups (%esi),%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L055enc1_loop_11:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L055enc1_loop_11
+.byte 102,15,56,221,209
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl %esp,%ebp
+ subl $120,%esp
+ andl $-16,%esp
+ xorl %ebx,%ebx
+ testl $15,%eax
+ setnz %bl
+ shll $4,%ebx
+ subl %ebx,%eax
+ movl $135,96(%esp)
+ movl $0,100(%esp)
+ movl $1,104(%esp)
+ movl $0,108(%esp)
+ movl %eax,112(%esp)
+ movl %ebp,116(%esp)
+ movl 240(%edx),%ecx
+ movl %edx,%ebp
+ movl %ecx,%ebx
+ movdqa %xmm2,%xmm1
+ pxor %xmm0,%xmm0
+ movdqa 96(%esp),%xmm3
+ pcmpgtd %xmm1,%xmm0
+ andl $-16,%eax
+ subl $96,%eax
+ jc .L056xts_dec_short
+ shrl $1,%ecx
+ movl %ecx,%ebx
+ jmp .L057xts_dec_loop6
+.align 16
+.L057xts_dec_loop6:
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,16(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,32(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,48(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm7
+ movdqa %xmm1,64(%esp)
+ paddq %xmm1,%xmm1
+ movups (%ebp),%xmm0
+ pand %xmm3,%xmm7
+ movups (%esi),%xmm2
+ pxor %xmm1,%xmm7
+ movdqu 16(%esi),%xmm3
+ xorps %xmm0,%xmm2
+ movdqu 32(%esi),%xmm4
+ pxor %xmm0,%xmm3
+ movdqu 48(%esi),%xmm5
+ pxor %xmm0,%xmm4
+ movdqu 64(%esi),%xmm6
+ pxor %xmm0,%xmm5
+ movdqu 80(%esi),%xmm1
+ pxor %xmm0,%xmm6
+ leal 96(%esi),%esi
+ pxor (%esp),%xmm2
+ movdqa %xmm7,80(%esp)
+ pxor %xmm1,%xmm7
+ movups 16(%ebp),%xmm1
+ leal 32(%ebp),%edx
+ pxor 16(%esp),%xmm3
+.byte 102,15,56,222,209
+ pxor 32(%esp),%xmm4
+.byte 102,15,56,222,217
+ pxor 48(%esp),%xmm5
+ decl %ecx
+.byte 102,15,56,222,225
+ pxor 64(%esp),%xmm6
+.byte 102,15,56,222,233
+ pxor %xmm0,%xmm7
+.byte 102,15,56,222,241
+ movups (%edx),%xmm0
+.byte 102,15,56,222,249
+ call .L_aesni_decrypt6_enter
+ movdqa 80(%esp),%xmm1
+ pxor %xmm0,%xmm0
+ xorps (%esp),%xmm2
+ pcmpgtd %xmm1,%xmm0
+ xorps 16(%esp),%xmm3
+ movups %xmm2,(%edi)
+ xorps 32(%esp),%xmm4
+ movups %xmm3,16(%edi)
+ xorps 48(%esp),%xmm5
+ movups %xmm4,32(%edi)
+ xorps 64(%esp),%xmm6
+ movups %xmm5,48(%edi)
+ xorps %xmm1,%xmm7
+ movups %xmm6,64(%edi)
+ pshufd $19,%xmm0,%xmm2
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ movdqa 96(%esp),%xmm3
+ pxor %xmm0,%xmm0
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ movl %ebx,%ecx
+ pxor %xmm2,%xmm1
+ subl $96,%eax
+ jnc .L057xts_dec_loop6
+ leal 1(,%ecx,2),%ecx
+ movl %ebp,%edx
+ movl %ecx,%ebx
+.L056xts_dec_short:
+ addl $96,%eax
+ jz .L058xts_dec_done6x
+ movdqa %xmm1,%xmm5
+ cmpl $32,%eax
+ jb .L059xts_dec_one
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ je .L060xts_dec_two
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,%xmm6
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ cmpl $64,%eax
+ jb .L061xts_dec_three
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,%xmm7
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ movdqa %xmm5,(%esp)
+ movdqa %xmm6,16(%esp)
+ je .L062xts_dec_four
+ movdqa %xmm7,32(%esp)
+ pshufd $19,%xmm0,%xmm7
+ movdqa %xmm1,48(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm7
+ pxor %xmm1,%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ pxor (%esp),%xmm2
+ movdqu 48(%esi),%xmm5
+ pxor 16(%esp),%xmm3
+ movdqu 64(%esi),%xmm6
+ pxor 32(%esp),%xmm4
+ leal 80(%esi),%esi
+ pxor 48(%esp),%xmm5
+ movdqa %xmm7,64(%esp)
+ pxor %xmm7,%xmm6
+ call _aesni_decrypt6
+ movaps 64(%esp),%xmm1
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps 32(%esp),%xmm4
+ movups %xmm2,(%edi)
+ xorps 48(%esp),%xmm5
+ movups %xmm3,16(%edi)
+ xorps %xmm1,%xmm6
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ leal 80(%edi),%edi
+ jmp .L063xts_dec_done
+.align 16
+.L059xts_dec_one:
+ movups (%esi),%xmm2
+ leal 16(%esi),%esi
+ xorps %xmm5,%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L064dec1_loop_12:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L064dec1_loop_12
+.byte 102,15,56,223,209
+ xorps %xmm5,%xmm2
+ movups %xmm2,(%edi)
+ leal 16(%edi),%edi
+ movdqa %xmm5,%xmm1
+ jmp .L063xts_dec_done
+.align 16
+.L060xts_dec_two:
+ movaps %xmm1,%xmm6
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ leal 32(%esi),%esi
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ call _aesni_decrypt3
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ leal 32(%edi),%edi
+ movdqa %xmm6,%xmm1
+ jmp .L063xts_dec_done
+.align 16
+.L061xts_dec_three:
+ movaps %xmm1,%xmm7
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ movups 32(%esi),%xmm4
+ leal 48(%esi),%esi
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ call _aesni_decrypt3
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ leal 48(%edi),%edi
+ movdqa %xmm7,%xmm1
+ jmp .L063xts_dec_done
+.align 16
+.L062xts_dec_four:
+ movaps %xmm1,%xmm6
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ movups 32(%esi),%xmm4
+ xorps (%esp),%xmm2
+ movups 48(%esi),%xmm5
+ leal 64(%esi),%esi
+ xorps 16(%esp),%xmm3
+ xorps %xmm7,%xmm4
+ xorps %xmm6,%xmm5
+ call _aesni_decrypt4
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps %xmm7,%xmm4
+ movups %xmm2,(%edi)
+ xorps %xmm6,%xmm5
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ leal 64(%edi),%edi
+ movdqa %xmm6,%xmm1
+ jmp .L063xts_dec_done
+.align 16
+.L058xts_dec_done6x:
+ movl 112(%esp),%eax
+ andl $15,%eax
+ jz .L065xts_dec_ret
+ movl %eax,112(%esp)
+ jmp .L066xts_dec_only_one_more
+.align 16
+.L063xts_dec_done:
+ movl 112(%esp),%eax
+ pxor %xmm0,%xmm0
+ andl $15,%eax
+ jz .L065xts_dec_ret
+ pcmpgtd %xmm1,%xmm0
+ movl %eax,112(%esp)
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa 96(%esp),%xmm3
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+.L066xts_dec_only_one_more:
+ pshufd $19,%xmm0,%xmm5
+ movdqa %xmm1,%xmm6
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm5
+ pxor %xmm1,%xmm5
+ movl %ebp,%edx
+ movl %ebx,%ecx
+ movups (%esi),%xmm2
+ xorps %xmm5,%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L067dec1_loop_13:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L067dec1_loop_13
+.byte 102,15,56,223,209
+ xorps %xmm5,%xmm2
+ movups %xmm2,(%edi)
+.L068xts_dec_steal:
+ movzbl 16(%esi),%ecx
+ movzbl (%edi),%edx
+ leal 1(%esi),%esi
+ movb %cl,(%edi)
+ movb %dl,16(%edi)
+ leal 1(%edi),%edi
+ subl $1,%eax
+ jnz .L068xts_dec_steal
+ subl 112(%esp),%edi
+ movl %ebp,%edx
+ movl %ebx,%ecx
+ movups (%edi),%xmm2
+ xorps %xmm6,%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L069dec1_loop_14:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L069dec1_loop_14
+.byte 102,15,56,223,209
+ xorps %xmm6,%xmm2
+ movups %xmm2,(%edi)
+.L065xts_dec_ret:
+ movl 116(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin
+.globl aesni_cbc_encrypt
+.type aesni_cbc_encrypt,@function
+.align 16
+aesni_cbc_encrypt:
+.L_aesni_cbc_encrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl %esp,%ebx
+ movl 24(%esp),%edi
+ subl $24,%ebx
+ movl 28(%esp),%eax
+ andl $-16,%ebx
+ movl 32(%esp),%edx
+ movl 36(%esp),%ebp
+ testl %eax,%eax
+ jz .L070cbc_abort
+ cmpl $0,40(%esp)
+ xchgl %esp,%ebx
+ movups (%ebp),%xmm7
+ movl 240(%edx),%ecx
+ movl %edx,%ebp
+ movl %ebx,16(%esp)
+ movl %ecx,%ebx
+ je .L071cbc_decrypt
+ movaps %xmm7,%xmm2
+ cmpl $16,%eax
+ jb .L072cbc_enc_tail
+ subl $16,%eax
+ jmp .L073cbc_enc_loop
+.align 16
+.L073cbc_enc_loop:
+ movups (%esi),%xmm7
+ leal 16(%esi),%esi
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm7
+ leal 32(%edx),%edx
+ xorps %xmm7,%xmm2
+.L074enc1_loop_15:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L074enc1_loop_15
+.byte 102,15,56,221,209
+ movl %ebx,%ecx
+ movl %ebp,%edx
+ movups %xmm2,(%edi)
+ leal 16(%edi),%edi
+ subl $16,%eax
+ jnc .L073cbc_enc_loop
+ addl $16,%eax
+ jnz .L072cbc_enc_tail
+ movaps %xmm2,%xmm7
+ jmp .L075cbc_ret
+.L072cbc_enc_tail:
+ movl %eax,%ecx
+.long 2767451785
+ movl $16,%ecx
+ subl %eax,%ecx
+ xorl %eax,%eax
+.long 2868115081
+ leal -16(%edi),%edi
+ movl %ebx,%ecx
+ movl %edi,%esi
+ movl %ebp,%edx
+ jmp .L073cbc_enc_loop
+.align 16
+.L071cbc_decrypt:
+ cmpl $80,%eax
+ jbe .L076cbc_dec_tail
+ movaps %xmm7,(%esp)
+ subl $80,%eax
+ jmp .L077cbc_dec_loop6_enter
+.align 16
+.L078cbc_dec_loop6:
+ movaps %xmm0,(%esp)
+ movups %xmm7,(%edi)
+ leal 16(%edi),%edi
+.L077cbc_dec_loop6_enter:
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ movdqu 80(%esi),%xmm7
+ call _aesni_decrypt6
+ movups (%esi),%xmm1
+ movups 16(%esi),%xmm0
+ xorps (%esp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%esi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%esi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%esi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%esi),%xmm0
+ xorps %xmm1,%xmm7
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ leal 96(%esi),%esi
+ movups %xmm4,32(%edi)
+ movl %ebx,%ecx
+ movups %xmm5,48(%edi)
+ movl %ebp,%edx
+ movups %xmm6,64(%edi)
+ leal 80(%edi),%edi
+ subl $96,%eax
+ ja .L078cbc_dec_loop6
+ movaps %xmm7,%xmm2
+ movaps %xmm0,%xmm7
+ addl $80,%eax
+ jle .L079cbc_dec_tail_collected
+ movups %xmm2,(%edi)
+ leal 16(%edi),%edi
+.L076cbc_dec_tail:
+ movups (%esi),%xmm2
+ movaps %xmm2,%xmm6
+ cmpl $16,%eax
+ jbe .L080cbc_dec_one
+ movups 16(%esi),%xmm3
+ movaps %xmm3,%xmm5
+ cmpl $32,%eax
+ jbe .L081cbc_dec_two
+ movups 32(%esi),%xmm4
+ cmpl $48,%eax
+ jbe .L082cbc_dec_three
+ movups 48(%esi),%xmm5
+ cmpl $64,%eax
+ jbe .L083cbc_dec_four
+ movups 64(%esi),%xmm6
+ movaps %xmm7,(%esp)
+ movups (%esi),%xmm2
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups (%esi),%xmm1
+ movups 16(%esi),%xmm0
+ xorps (%esp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%esi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%esi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%esi),%xmm7
+ xorps %xmm0,%xmm6
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ leal 64(%edi),%edi
+ movaps %xmm6,%xmm2
+ subl $80,%eax
+ jmp .L079cbc_dec_tail_collected
+.align 16
+.L080cbc_dec_one:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L084dec1_loop_16:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L084dec1_loop_16
+.byte 102,15,56,223,209
+ xorps %xmm7,%xmm2
+ movaps %xmm6,%xmm7
+ subl $16,%eax
+ jmp .L079cbc_dec_tail_collected
+.align 16
+.L081cbc_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ xorps %xmm7,%xmm2
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ movaps %xmm3,%xmm2
+ leal 16(%edi),%edi
+ movaps %xmm5,%xmm7
+ subl $32,%eax
+ jmp .L079cbc_dec_tail_collected
+.align 16
+.L082cbc_dec_three:
+ call _aesni_decrypt3
+ xorps %xmm7,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm5,%xmm4
+ movups %xmm2,(%edi)
+ movaps %xmm4,%xmm2
+ movups %xmm3,16(%edi)
+ leal 32(%edi),%edi
+ movups 32(%esi),%xmm7
+ subl $48,%eax
+ jmp .L079cbc_dec_tail_collected
+.align 16
+.L083cbc_dec_four:
+ call _aesni_decrypt4
+ movups 16(%esi),%xmm1
+ movups 32(%esi),%xmm0
+ xorps %xmm7,%xmm2
+ movups 48(%esi),%xmm7
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ xorps %xmm1,%xmm4
+ movups %xmm3,16(%edi)
+ xorps %xmm0,%xmm5
+ movups %xmm4,32(%edi)
+ leal 48(%edi),%edi
+ movaps %xmm5,%xmm2
+ subl $64,%eax
+.L079cbc_dec_tail_collected:
+ andl $15,%eax
+ jnz .L085cbc_dec_tail_partial
+ movups %xmm2,(%edi)
+ jmp .L075cbc_ret
+.align 16
+.L085cbc_dec_tail_partial:
+ movaps %xmm2,(%esp)
+ movl $16,%ecx
+ movl %esp,%esi
+ subl %eax,%ecx
+.long 2767451785
+.L075cbc_ret:
+ movl 16(%esp),%esp
+ movl 36(%esp),%ebp
+ movups %xmm7,(%ebp)
+.L070cbc_abort:
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin
+.type _aesni_set_encrypt_key,@function
+.align 16
+_aesni_set_encrypt_key:
+ testl %eax,%eax
+ jz .L086bad_pointer
+ testl %edx,%edx
+ jz .L086bad_pointer
+ movups (%eax),%xmm0
+ xorps %xmm4,%xmm4
+ leal 16(%edx),%edx
+ cmpl $256,%ecx
+ je .L08714rounds
+ cmpl $192,%ecx
+ je .L08812rounds
+ cmpl $128,%ecx
+ jne .L089bad_keybits
+.align 16
+.L09010rounds:
+ movl $9,%ecx
+ movups %xmm0,-16(%edx)
+.byte 102,15,58,223,200,1
+ call .L091key_128_cold
+.byte 102,15,58,223,200,2
+ call .L092key_128
+.byte 102,15,58,223,200,4
+ call .L092key_128
+.byte 102,15,58,223,200,8
+ call .L092key_128
+.byte 102,15,58,223,200,16
+ call .L092key_128
+.byte 102,15,58,223,200,32
+ call .L092key_128
+.byte 102,15,58,223,200,64
+ call .L092key_128
+.byte 102,15,58,223,200,128
+ call .L092key_128
+.byte 102,15,58,223,200,27
+ call .L092key_128
+.byte 102,15,58,223,200,54
+ call .L092key_128
+ movups %xmm0,(%edx)
+ movl %ecx,80(%edx)
+ xorl %eax,%eax
+ ret
+.align 16
+.L092key_128:
+ movups %xmm0,(%edx)
+ leal 16(%edx),%edx
+.L091key_128_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ ret
+.align 16
+.L08812rounds:
+ movq 16(%eax),%xmm2
+ movl $11,%ecx
+ movups %xmm0,-16(%edx)
+.byte 102,15,58,223,202,1
+ call .L093key_192a_cold
+.byte 102,15,58,223,202,2
+ call .L094key_192b
+.byte 102,15,58,223,202,4
+ call .L095key_192a
+.byte 102,15,58,223,202,8
+ call .L094key_192b
+.byte 102,15,58,223,202,16
+ call .L095key_192a
+.byte 102,15,58,223,202,32
+ call .L094key_192b
+.byte 102,15,58,223,202,64
+ call .L095key_192a
+.byte 102,15,58,223,202,128
+ call .L094key_192b
+ movups %xmm0,(%edx)
+ movl %ecx,48(%edx)
+ xorl %eax,%eax
+ ret
+.align 16
+.L095key_192a:
+ movups %xmm0,(%edx)
+ leal 16(%edx),%edx
+.align 16
+.L093key_192a_cold:
+ movaps %xmm2,%xmm5
+.L096key_192b_warm:
+ shufps $16,%xmm0,%xmm4
+ movdqa %xmm2,%xmm3
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ pslldq $4,%xmm3
+ xorps %xmm4,%xmm0
+ pshufd $85,%xmm1,%xmm1
+ pxor %xmm3,%xmm2
+ pxor %xmm1,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm3,%xmm2
+ ret
+.align 16
+.L094key_192b:
+ movaps %xmm0,%xmm3
+ shufps $68,%xmm0,%xmm5
+ movups %xmm5,(%edx)
+ shufps $78,%xmm2,%xmm3
+ movups %xmm3,16(%edx)
+ leal 32(%edx),%edx
+ jmp .L096key_192b_warm
+.align 16
+.L08714rounds:
+ movups 16(%eax),%xmm2
+ movl $13,%ecx
+ leal 16(%edx),%edx
+ movups %xmm0,-32(%edx)
+ movups %xmm2,-16(%edx)
+.byte 102,15,58,223,202,1
+ call .L097key_256a_cold
+.byte 102,15,58,223,200,1
+ call .L098key_256b
+.byte 102,15,58,223,202,2
+ call .L099key_256a
+.byte 102,15,58,223,200,2
+ call .L098key_256b
+.byte 102,15,58,223,202,4
+ call .L099key_256a
+.byte 102,15,58,223,200,4
+ call .L098key_256b
+.byte 102,15,58,223,202,8
+ call .L099key_256a
+.byte 102,15,58,223,200,8
+ call .L098key_256b
+.byte 102,15,58,223,202,16
+ call .L099key_256a
+.byte 102,15,58,223,200,16
+ call .L098key_256b
+.byte 102,15,58,223,202,32
+ call .L099key_256a
+.byte 102,15,58,223,200,32
+ call .L098key_256b
+.byte 102,15,58,223,202,64
+ call .L099key_256a
+ movups %xmm0,(%edx)
+ movl %ecx,16(%edx)
+ xorl %eax,%eax
+ ret
+.align 16
+.L099key_256a:
+ movups %xmm2,(%edx)
+ leal 16(%edx),%edx
+.L097key_256a_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ ret
+.align 16
+.L098key_256b:
+ movups %xmm0,(%edx)
+ leal 16(%edx),%edx
+ shufps $16,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $140,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $170,%xmm1,%xmm1
+ xorps %xmm1,%xmm2
+ ret
+.align 4
+.L086bad_pointer:
+ movl $-1,%eax
+ ret
+.align 4
+.L089bad_keybits:
+ movl $-2,%eax
+ ret
+.size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key
+.globl aesni_set_encrypt_key
+.type aesni_set_encrypt_key,@function
+.align 16
+aesni_set_encrypt_key:
+.L_aesni_set_encrypt_key_begin:
+ movl 4(%esp),%eax
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ call _aesni_set_encrypt_key
+ ret
+.size aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin
+.globl aesni_set_decrypt_key
+.type aesni_set_decrypt_key,@function
+.align 16
+aesni_set_decrypt_key:
+.L_aesni_set_decrypt_key_begin:
+ movl 4(%esp),%eax
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ call _aesni_set_encrypt_key
+ movl 12(%esp),%edx
+ shll $4,%ecx
+ testl %eax,%eax
+ jnz .L100dec_key_ret
+ leal 16(%edx,%ecx,1),%eax
+ movups (%edx),%xmm0
+ movups (%eax),%xmm1
+ movups %xmm0,(%eax)
+ movups %xmm1,(%edx)
+ leal 16(%edx),%edx
+ leal -16(%eax),%eax
+.L101dec_key_inverse:
+ movups (%edx),%xmm0
+ movups (%eax),%xmm1
+.byte 102,15,56,219,192
+.byte 102,15,56,219,201
+ leal 16(%edx),%edx
+ leal -16(%eax),%eax
+ movups %xmm0,16(%eax)
+ movups %xmm1,-16(%edx)
+ cmpl %edx,%eax
+ ja .L101dec_key_inverse
+ movups (%edx),%xmm0
+.byte 102,15,56,219,192
+ movups %xmm0,(%edx)
+ xorl %eax,%eax
+.L100dec_key_ret:
+ ret
+.size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
+.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+.byte 115,108,46,111,114,103,62,0
--- /dev/null
+.text
+.globl aesni_encrypt
+.type aesni_encrypt,@function
+.align 16
+aesni_encrypt:
+ movups (%rdi),%xmm2
+ movl 240(%rdx),%eax
+ movups (%rdx),%xmm0
+ movups 16(%rdx),%xmm1
+ leaq 32(%rdx),%rdx
+ xorps %xmm0,%xmm2
+.Loop_enc1_1:
+.byte 102,15,56,220,209
+ decl %eax
+ movups (%rdx),%xmm1
+ leaq 16(%rdx),%rdx
+ jnz .Loop_enc1_1
+.byte 102,15,56,221,209
+ movups %xmm2,(%rsi)
+ .byte 0xf3,0xc3
+.size aesni_encrypt,.-aesni_encrypt
+
+.globl aesni_decrypt
+.type aesni_decrypt,@function
+.align 16
+aesni_decrypt:
+ movups (%rdi),%xmm2
+ movl 240(%rdx),%eax
+ movups (%rdx),%xmm0
+ movups 16(%rdx),%xmm1
+ leaq 32(%rdx),%rdx
+ xorps %xmm0,%xmm2
+.Loop_dec1_2:
+.byte 102,15,56,222,209
+ decl %eax
+ movups (%rdx),%xmm1
+ leaq 16(%rdx),%rdx
+ jnz .Loop_dec1_2
+.byte 102,15,56,223,209
+ movups %xmm2,(%rsi)
+ .byte 0xf3,0xc3
+.size aesni_decrypt, .-aesni_decrypt
+.type _aesni_encrypt3,@function
+.align 16
+_aesni_encrypt3:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+
+.Lenc_loop3:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ decl %eax
+.byte 102,15,56,220,225
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,220,224
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop3
+
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+.byte 102,15,56,221,224
+ .byte 0xf3,0xc3
+.size _aesni_encrypt3,.-_aesni_encrypt3
+.type _aesni_decrypt3,@function
+.align 16
+_aesni_decrypt3:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+
+.Ldec_loop3:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ decl %eax
+.byte 102,15,56,222,225
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,222,224
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop3
+
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+.byte 102,15,56,223,224
+ .byte 0xf3,0xc3
+.size _aesni_decrypt3,.-_aesni_decrypt3
+.type _aesni_encrypt4,@function
+.align 16
+_aesni_encrypt4:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+
+.Lenc_loop4:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ decl %eax
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop4
+
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+.byte 102,15,56,221,224
+.byte 102,15,56,221,232
+ .byte 0xf3,0xc3
+.size _aesni_encrypt4,.-_aesni_encrypt4
+.type _aesni_decrypt4,@function
+.align 16
+_aesni_decrypt4:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+
+.Ldec_loop4:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ decl %eax
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop4
+
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+.byte 102,15,56,223,224
+.byte 102,15,56,223,232
+ .byte 0xf3,0xc3
+.size _aesni_decrypt4,.-_aesni_decrypt4
+.type _aesni_encrypt6,@function
+.align 16
+_aesni_encrypt6:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+.byte 102,15,56,220,209
+ pxor %xmm0,%xmm4
+.byte 102,15,56,220,217
+ pxor %xmm0,%xmm5
+.byte 102,15,56,220,225
+ pxor %xmm0,%xmm6
+.byte 102,15,56,220,233
+ pxor %xmm0,%xmm7
+ decl %eax
+.byte 102,15,56,220,241
+ movups (%rcx),%xmm0
+.byte 102,15,56,220,249
+ jmp .Lenc_loop6_enter
+.align 16
+.Lenc_loop6:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ decl %eax
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+.Lenc_loop6_enter:
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+.byte 102,15,56,220,240
+.byte 102,15,56,220,248
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop6
+
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+.byte 102,15,56,221,224
+.byte 102,15,56,221,232
+.byte 102,15,56,221,240
+.byte 102,15,56,221,248
+ .byte 0xf3,0xc3
+.size _aesni_encrypt6,.-_aesni_encrypt6
+.type _aesni_decrypt6,@function
+.align 16
+_aesni_decrypt6:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+.byte 102,15,56,222,209
+ pxor %xmm0,%xmm4
+.byte 102,15,56,222,217
+ pxor %xmm0,%xmm5
+.byte 102,15,56,222,225
+ pxor %xmm0,%xmm6
+.byte 102,15,56,222,233
+ pxor %xmm0,%xmm7
+ decl %eax
+.byte 102,15,56,222,241
+ movups (%rcx),%xmm0
+.byte 102,15,56,222,249
+ jmp .Ldec_loop6_enter
+.align 16
+.Ldec_loop6:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ decl %eax
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+.Ldec_loop6_enter:
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+.byte 102,15,56,222,240
+.byte 102,15,56,222,248
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop6
+
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+.byte 102,15,56,223,224
+.byte 102,15,56,223,232
+.byte 102,15,56,223,240
+.byte 102,15,56,223,248
+ .byte 0xf3,0xc3
+.size _aesni_decrypt6,.-_aesni_decrypt6
+.type _aesni_encrypt8,@function
+.align 16
+_aesni_encrypt8:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+.byte 102,15,56,220,209
+ pxor %xmm0,%xmm4
+.byte 102,15,56,220,217
+ pxor %xmm0,%xmm5
+.byte 102,15,56,220,225
+ pxor %xmm0,%xmm6
+.byte 102,15,56,220,233
+ pxor %xmm0,%xmm7
+ decl %eax
+.byte 102,15,56,220,241
+ pxor %xmm0,%xmm8
+.byte 102,15,56,220,249
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+.byte 102,68,15,56,220,193
+.byte 102,68,15,56,220,201
+ movups 16(%rcx),%xmm1
+ jmp .Lenc_loop8_enter
+.align 16
+.Lenc_loop8:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ decl %eax
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+.byte 102,68,15,56,220,193
+.byte 102,68,15,56,220,201
+ movups 16(%rcx),%xmm1
+.Lenc_loop8_enter:
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+.byte 102,15,56,220,240
+.byte 102,15,56,220,248
+.byte 102,68,15,56,220,192
+.byte 102,68,15,56,220,200
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop8
+
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+.byte 102,68,15,56,220,193
+.byte 102,68,15,56,220,201
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+.byte 102,15,56,221,224
+.byte 102,15,56,221,232
+.byte 102,15,56,221,240
+.byte 102,15,56,221,248
+.byte 102,68,15,56,221,192
+.byte 102,68,15,56,221,200
+ .byte 0xf3,0xc3
+.size _aesni_encrypt8,.-_aesni_encrypt8
+.type _aesni_decrypt8,@function
+.align 16
+_aesni_decrypt8:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+.byte 102,15,56,222,209
+ pxor %xmm0,%xmm4
+.byte 102,15,56,222,217
+ pxor %xmm0,%xmm5
+.byte 102,15,56,222,225
+ pxor %xmm0,%xmm6
+.byte 102,15,56,222,233
+ pxor %xmm0,%xmm7
+ decl %eax
+.byte 102,15,56,222,241
+ pxor %xmm0,%xmm8
+.byte 102,15,56,222,249
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+.byte 102,68,15,56,222,193
+.byte 102,68,15,56,222,201
+ movups 16(%rcx),%xmm1
+ jmp .Ldec_loop8_enter
+.align 16
+.Ldec_loop8:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ decl %eax
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+.byte 102,68,15,56,222,193
+.byte 102,68,15,56,222,201
+ movups 16(%rcx),%xmm1
+.Ldec_loop8_enter:
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+.byte 102,15,56,222,240
+.byte 102,15,56,222,248
+.byte 102,68,15,56,222,192
+.byte 102,68,15,56,222,200
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop8
+
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+.byte 102,68,15,56,222,193
+.byte 102,68,15,56,222,201
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+.byte 102,15,56,223,224
+.byte 102,15,56,223,232
+.byte 102,15,56,223,240
+.byte 102,15,56,223,248
+.byte 102,68,15,56,223,192
+.byte 102,68,15,56,223,200
+ .byte 0xf3,0xc3
+.size _aesni_decrypt8,.-_aesni_decrypt8
+.globl aesni_ecb_encrypt
+.type aesni_ecb_encrypt,@function
+.align 16
+aesni_ecb_encrypt:
+ andq $-16,%rdx
+ jz .Lecb_ret
+
+ movl 240(%rcx),%eax
+ movups (%rcx),%xmm0
+ movq %rcx,%r11
+ movl %eax,%r10d
+ testl %r8d,%r8d
+ jz .Lecb_decrypt
+
+ cmpq $128,%rdx
+ jb .Lecb_enc_tail
+
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp .Lecb_enc_loop8_enter
+.align 16
+.Lecb_enc_loop8:
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+.Lecb_enc_loop8_enter:
+
+ call _aesni_encrypt8
+
+ subq $128,%rdx
+ jnc .Lecb_enc_loop8
+
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz .Lecb_ret
+
+.Lecb_enc_tail:
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb .Lecb_enc_one
+ movups 16(%rdi),%xmm3
+ je .Lecb_enc_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb .Lecb_enc_three
+ movups 48(%rdi),%xmm5
+ je .Lecb_enc_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb .Lecb_enc_five
+ movups 80(%rdi),%xmm7
+ je .Lecb_enc_six
+ movdqu 96(%rdi),%xmm8
+ call _aesni_encrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_3:
+.byte 102,15,56,220,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_3
+.byte 102,15,56,221,209
+ movups %xmm2,(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_two:
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_three:
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_four:
+ call _aesni_encrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_five:
+ xorps %xmm7,%xmm7
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_six:
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ jmp .Lecb_ret
+
+.align 16
+.Lecb_decrypt:
+ cmpq $128,%rdx
+ jb .Lecb_dec_tail
+
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp .Lecb_dec_loop8_enter
+.align 16
+.Lecb_dec_loop8:
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+.Lecb_dec_loop8_enter:
+
+ call _aesni_decrypt8
+
+ movups (%r11),%xmm0
+ subq $128,%rdx
+ jnc .Lecb_dec_loop8
+
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz .Lecb_ret
+
+.Lecb_dec_tail:
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb .Lecb_dec_one
+ movups 16(%rdi),%xmm3
+ je .Lecb_dec_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb .Lecb_dec_three
+ movups 48(%rdi),%xmm5
+ je .Lecb_dec_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb .Lecb_dec_five
+ movups 80(%rdi),%xmm7
+ je .Lecb_dec_six
+ movups 96(%rdi),%xmm8
+ movups (%rcx),%xmm0
+ call _aesni_decrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_4:
+.byte 102,15,56,222,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_4
+.byte 102,15,56,223,209
+ movups %xmm2,(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_three:
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_four:
+ call _aesni_decrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_five:
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_six:
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+
+.Lecb_ret:
+ .byte 0xf3,0xc3
+.size aesni_ecb_encrypt,.-aesni_ecb_encrypt
+.globl aesni_ccm64_encrypt_blocks
+.type aesni_ccm64_encrypt_blocks,@function
+.align 16
+aesni_ccm64_encrypt_blocks:
+ movl 240(%rcx),%eax
+ movdqu (%r8),%xmm9
+ movdqa .Lincrement64(%rip),%xmm6
+ movdqa .Lbswap_mask(%rip),%xmm7
+
+ shrl $1,%eax
+ leaq 0(%rcx),%r11
+ movdqu (%r9),%xmm3
+ movdqa %xmm9,%xmm2
+ movl %eax,%r10d
+.byte 102,68,15,56,0,207
+ jmp .Lccm64_enc_outer
+.align 16
+.Lccm64_enc_outer:
+ movups (%r11),%xmm0
+ movl %r10d,%eax
+ movups (%rdi),%xmm8
+
+ xorps %xmm0,%xmm2
+ movups 16(%r11),%xmm1
+ xorps %xmm8,%xmm0
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm3
+ movups (%rcx),%xmm0
+
+.Lccm64_enc2_loop:
+.byte 102,15,56,220,209
+ decl %eax
+.byte 102,15,56,220,217
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,220,208
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,220,216
+ movups 0(%rcx),%xmm0
+ jnz .Lccm64_enc2_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ paddq %xmm6,%xmm9
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+
+ decq %rdx
+ leaq 16(%rdi),%rdi
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+ jnz .Lccm64_enc_outer
+
+ movups %xmm3,(%r9)
+ .byte 0xf3,0xc3
+.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
+.globl aesni_ccm64_decrypt_blocks
+.type aesni_ccm64_decrypt_blocks,@function
+.align 16
+aesni_ccm64_decrypt_blocks:
+ movl 240(%rcx),%eax
+ movups (%r8),%xmm9
+ movdqu (%r9),%xmm3
+ movdqa .Lincrement64(%rip),%xmm6
+ movdqa .Lbswap_mask(%rip),%xmm7
+
+ movaps %xmm9,%xmm2
+ movl %eax,%r10d
+ movq %rcx,%r11
+.byte 102,68,15,56,0,207
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_5:
+.byte 102,15,56,220,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_5
+.byte 102,15,56,221,209
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ leaq 16(%rdi),%rdi
+ jmp .Lccm64_dec_outer
+.align 16
+.Lccm64_dec_outer:
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movl %r10d,%eax
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+
+ subq $1,%rdx
+ jz .Lccm64_dec_break
+
+ movups (%r11),%xmm0
+ shrl $1,%eax
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm8,%xmm3
+ movups (%rcx),%xmm0
+
+.Lccm64_dec2_loop:
+.byte 102,15,56,220,209
+ decl %eax
+.byte 102,15,56,220,217
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,220,208
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,220,216
+ movups 0(%rcx),%xmm0
+ jnz .Lccm64_dec2_loop
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ leaq 16(%rdi),%rdi
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+ jmp .Lccm64_dec_outer
+
+.align 16
+.Lccm64_dec_break:
+
+ movups (%r11),%xmm0
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%r11
+ xorps %xmm8,%xmm3
+.Loop_enc1_6:
+.byte 102,15,56,220,217
+ decl %eax
+ movups (%r11),%xmm1
+ leaq 16(%r11),%r11
+ jnz .Loop_enc1_6
+.byte 102,15,56,221,217
+ movups %xmm3,(%r9)
+ .byte 0xf3,0xc3
+.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
+.globl aesni_ctr32_encrypt_blocks
+.type aesni_ctr32_encrypt_blocks,@function
+.align 16
+aesni_ctr32_encrypt_blocks:
+ cmpq $1,%rdx
+ je .Lctr32_one_shortcut
+
+ movdqu (%r8),%xmm14
+ movdqa .Lbswap_mask(%rip),%xmm15
+ xorl %eax,%eax
+.byte 102,69,15,58,22,242,3
+.byte 102,68,15,58,34,240,3
+
+ movl 240(%rcx),%eax
+ bswapl %r10d
+ pxor %xmm12,%xmm12
+ pxor %xmm13,%xmm13
+.byte 102,69,15,58,34,226,0
+ leaq 3(%r10),%r11
+.byte 102,69,15,58,34,235,0
+ incl %r10d
+.byte 102,69,15,58,34,226,1
+ incq %r11
+.byte 102,69,15,58,34,235,1
+ incl %r10d
+.byte 102,69,15,58,34,226,2
+ incq %r11
+.byte 102,69,15,58,34,235,2
+ movdqa %xmm12,-40(%rsp)
+.byte 102,69,15,56,0,231
+ movdqa %xmm13,-24(%rsp)
+.byte 102,69,15,56,0,239
+
+ pshufd $192,%xmm12,%xmm2
+ pshufd $128,%xmm12,%xmm3
+ pshufd $64,%xmm12,%xmm4
+ cmpq $6,%rdx
+ jb .Lctr32_tail
+ shrl $1,%eax
+ movq %rcx,%r11
+ movl %eax,%r10d
+ subq $6,%rdx
+ jmp .Lctr32_loop6
+
+.align 16
+.Lctr32_loop6:
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm2
+ movups (%r11),%xmm0
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm3
+ movups 16(%r11),%xmm1
+ pshufd $64,%xmm13,%xmm7
+ por %xmm14,%xmm4
+ por %xmm14,%xmm5
+ xorps %xmm0,%xmm2
+ por %xmm14,%xmm6
+ por %xmm14,%xmm7
+
+
+
+
+ pxor %xmm0,%xmm3
+.byte 102,15,56,220,209
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+.byte 102,15,56,220,217
+ movdqa .Lincrement32(%rip),%xmm13
+ pxor %xmm0,%xmm5
+.byte 102,15,56,220,225
+ movdqa -40(%rsp),%xmm12
+ pxor %xmm0,%xmm6
+.byte 102,15,56,220,233
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+ jmp .Lctr32_enc_loop6_enter
+.align 16
+.Lctr32_enc_loop6:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ decl %eax
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+.Lctr32_enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+.byte 102,15,56,220,240
+.byte 102,15,56,220,248
+ movups (%rcx),%xmm0
+ jnz .Lctr32_enc_loop6
+
+.byte 102,15,56,220,209
+ paddd %xmm13,%xmm12
+.byte 102,15,56,220,217
+ paddd -24(%rsp),%xmm13
+.byte 102,15,56,220,225
+ movdqa %xmm12,-40(%rsp)
+.byte 102,15,56,220,233
+ movdqa %xmm13,-24(%rsp)
+.byte 102,15,56,220,241
+.byte 102,69,15,56,0,231
+.byte 102,15,56,220,249
+.byte 102,69,15,56,0,239
+
+.byte 102,15,56,221,208
+ movups (%rdi),%xmm8
+.byte 102,15,56,221,216
+ movups 16(%rdi),%xmm9
+.byte 102,15,56,221,224
+ movups 32(%rdi),%xmm10
+.byte 102,15,56,221,232
+ movups 48(%rdi),%xmm11
+.byte 102,15,56,221,240
+ movups 64(%rdi),%xmm1
+.byte 102,15,56,221,248
+ movups 80(%rdi),%xmm0
+ leaq 96(%rdi),%rdi
+
+ xorps %xmm2,%xmm8
+ pshufd $192,%xmm12,%xmm2
+ xorps %xmm3,%xmm9
+ pshufd $128,%xmm12,%xmm3
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ pshufd $64,%xmm12,%xmm4
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ xorps %xmm7,%xmm0
+ movups %xmm1,64(%rsi)
+ movups %xmm0,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movl %r10d,%eax
+ subq $6,%rdx
+ jnc .Lctr32_loop6
+
+ addq $6,%rdx
+ jz .Lctr32_done
+ movq %r11,%rcx
+ leal 1(%rax,%rax,1),%eax
+
+.Lctr32_tail:
+ por %xmm14,%xmm2
+ movups (%rdi),%xmm8
+ cmpq $2,%rdx
+ jb .Lctr32_one
+
+ por %xmm14,%xmm3
+ movups 16(%rdi),%xmm9
+ je .Lctr32_two
+
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm4
+ movups 32(%rdi),%xmm10
+ cmpq $4,%rdx
+ jb .Lctr32_three
+
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm5
+ movups 48(%rdi),%xmm11
+ je .Lctr32_four
+
+ por %xmm14,%xmm6
+ xorps %xmm7,%xmm7
+
+ call _aesni_encrypt6
+
+ movups 64(%rdi),%xmm1
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ movups %xmm1,64(%rsi)
+ jmp .Lctr32_done
+
+.align 16
+.Lctr32_one_shortcut:
+ movups (%r8),%xmm2
+ movups (%rdi),%xmm8
+ movl 240(%rcx),%eax
+.Lctr32_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_7:
+.byte 102,15,56,220,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_7
+.byte 102,15,56,221,209
+ xorps %xmm2,%xmm8
+ movups %xmm8,(%rsi)
+ jmp .Lctr32_done
+
+.align 16
+.Lctr32_two:
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ movups %xmm9,16(%rsi)
+ jmp .Lctr32_done
+
+.align 16
+.Lctr32_three:
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ movups %xmm10,32(%rsi)
+ jmp .Lctr32_done
+
+.align 16
+.Lctr32_four:
+ call _aesni_encrypt4
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ movups %xmm11,48(%rsi)
+
+.Lctr32_done:
+ .byte 0xf3,0xc3
+.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
+.globl aesni_xts_encrypt
+.type aesni_xts_encrypt,@function
+.align 16
+aesni_xts_encrypt:
+ leaq -104(%rsp),%rsp
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+.Loop_enc1_8:
+.byte 102,68,15,56,220,249
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_8
+.byte 102,68,15,56,221,249
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+
+ movdqa .Lxts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc .Lxts_enc_short
+
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp .Lxts_enc_grandloop
+
+.align 16
+.Lxts_enc_grandloop:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+
+
+
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+.byte 102,15,56,220,209
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+.byte 102,15,56,220,217
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+.byte 102,15,56,220,225
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+.byte 102,15,56,220,233
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+.byte 102,15,56,220,241
+ movdqa %xmm15,80(%rsp)
+.byte 102,15,56,220,249
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp .Lxts_enc_loop6_enter
+
+.align 16
+.Lxts_enc_loop6:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ decl %eax
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+.Lxts_enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+.byte 102,15,56,220,240
+.byte 102,15,56,220,248
+ movups (%rcx),%xmm0
+ jnz .Lxts_enc_loop6
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+.byte 102,15,56,220,209
+ pand %xmm8,%xmm9
+.byte 102,15,56,220,217
+ pcmpgtd %xmm15,%xmm14
+.byte 102,15,56,220,225
+ pxor %xmm9,%xmm15
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+ movups 16(%rcx),%xmm1
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+.byte 102,15,56,220,208
+ pand %xmm8,%xmm9
+.byte 102,15,56,220,216
+ pcmpgtd %xmm15,%xmm14
+.byte 102,15,56,220,224
+ pxor %xmm9,%xmm15
+.byte 102,15,56,220,232
+.byte 102,15,56,220,240
+.byte 102,15,56,220,248
+ movups 32(%rcx),%xmm0
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+.byte 102,15,56,220,209
+ pand %xmm8,%xmm9
+.byte 102,15,56,220,217
+ pcmpgtd %xmm15,%xmm14
+.byte 102,15,56,220,225
+ pxor %xmm9,%xmm15
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+.byte 102,15,56,221,208
+ pand %xmm8,%xmm9
+.byte 102,15,56,221,216
+ pcmpgtd %xmm15,%xmm14
+.byte 102,15,56,221,224
+ pxor %xmm9,%xmm15
+.byte 102,15,56,221,232
+.byte 102,15,56,221,240
+.byte 102,15,56,221,248
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc .Lxts_enc_grandloop
+
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+
+.Lxts_enc_short:
+ addq $96,%rdx
+ jz .Lxts_enc_done
+
+ cmpq $32,%rdx
+ jb .Lxts_enc_one
+ je .Lxts_enc_two
+
+ cmpq $64,%rdx
+ jb .Lxts_enc_three
+ je .Lxts_enc_four
+
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+
+ call _aesni_encrypt6
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ movdqu %xmm5,48(%rsi)
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_one:
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_9:
+.byte 102,15,56,220,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_9
+.byte 102,15,56,221,209
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ leaq 16(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_two:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+
+ call _aesni_encrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_three:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+
+ call _aesni_encrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_four:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+
+ call _aesni_encrypt4
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_done:
+ andq $15,%r9
+ jz .Lxts_enc_ret
+ movq %r9,%rdx
+
+.Lxts_enc_steal:
+ movzbl (%rdi),%eax
+ movzbl -16(%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,-16(%rsi)
+ movb %cl,0(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz .Lxts_enc_steal
+
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups -16(%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_10:
+.byte 102,15,56,220,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_10
+.byte 102,15,56,221,209
+ xorps %xmm10,%xmm2
+ movups %xmm2,-16(%rsi)
+
+.Lxts_enc_ret:
+ leaq 104(%rsp),%rsp
+.Lxts_enc_epilogue:
+ .byte 0xf3,0xc3
+.size aesni_xts_encrypt,.-aesni_xts_encrypt
+.globl aesni_xts_decrypt
+.type aesni_xts_decrypt,@function
+.align 16
+aesni_xts_decrypt:
+ leaq -104(%rsp),%rsp
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+.Loop_enc1_11:
+.byte 102,68,15,56,220,249
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_11
+.byte 102,68,15,56,221,249
+ xorl %eax,%eax
+ testq $15,%rdx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%rdx
+
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+
+ movdqa .Lxts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc .Lxts_dec_short
+
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp .Lxts_dec_grandloop
+
+.align 16
+.Lxts_dec_grandloop:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+
+
+
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+.byte 102,15,56,222,209
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+.byte 102,15,56,222,217
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+.byte 102,15,56,222,225
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+.byte 102,15,56,222,233
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+.byte 102,15,56,222,241
+ movdqa %xmm15,80(%rsp)
+.byte 102,15,56,222,249
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp .Lxts_dec_loop6_enter
+
+.align 16
+.Lxts_dec_loop6:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ decl %eax
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+.Lxts_dec_loop6_enter:
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+.byte 102,15,56,222,240
+.byte 102,15,56,222,248
+ movups (%rcx),%xmm0
+ jnz .Lxts_dec_loop6
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+.byte 102,15,56,222,209
+ pand %xmm8,%xmm9
+.byte 102,15,56,222,217
+ pcmpgtd %xmm15,%xmm14
+.byte 102,15,56,222,225
+ pxor %xmm9,%xmm15
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+ movups 16(%rcx),%xmm1
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+.byte 102,15,56,222,208
+ pand %xmm8,%xmm9
+.byte 102,15,56,222,216
+ pcmpgtd %xmm15,%xmm14
+.byte 102,15,56,222,224
+ pxor %xmm9,%xmm15
+.byte 102,15,56,222,232
+.byte 102,15,56,222,240
+.byte 102,15,56,222,248
+ movups 32(%rcx),%xmm0
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+.byte 102,15,56,222,209
+ pand %xmm8,%xmm9
+.byte 102,15,56,222,217
+ pcmpgtd %xmm15,%xmm14
+.byte 102,15,56,222,225
+ pxor %xmm9,%xmm15
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+.byte 102,15,56,223,208
+ pand %xmm8,%xmm9
+.byte 102,15,56,223,216
+ pcmpgtd %xmm15,%xmm14
+.byte 102,15,56,223,224
+ pxor %xmm9,%xmm15
+.byte 102,15,56,223,232
+.byte 102,15,56,223,240
+.byte 102,15,56,223,248
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc .Lxts_dec_grandloop
+
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+
+.Lxts_dec_short:
+ addq $96,%rdx
+ jz .Lxts_dec_done
+
+ cmpq $32,%rdx
+ jb .Lxts_dec_one
+ je .Lxts_dec_two
+
+ cmpq $64,%rdx
+ jb .Lxts_dec_three
+ je .Lxts_dec_four
+
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+
+ call _aesni_decrypt6
+
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ pxor %xmm14,%xmm14
+ movdqu %xmm5,48(%rsi)
+ pcmpgtd %xmm15,%xmm14
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ pshufd $19,%xmm14,%xmm11
+ andq $15,%r9
+ jz .Lxts_dec_ret
+
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm11
+ pxor %xmm15,%xmm11
+ jmp .Lxts_dec_done2
+
+.align 16
+.Lxts_dec_one:
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_12:
+.byte 102,15,56,222,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_12
+.byte 102,15,56,223,209
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ movdqa %xmm12,%xmm11
+ leaq 16(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.align 16
+.Lxts_dec_two:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+
+ call _aesni_decrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm13,%xmm11
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.align 16
+.Lxts_dec_three:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+
+ call _aesni_decrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.align 16
+.Lxts_dec_four:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movups (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movups 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+
+ call _aesni_decrypt4
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm14,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.align 16
+.Lxts_dec_done:
+ andq $15,%r9
+ jz .Lxts_dec_ret
+.Lxts_dec_done2:
+ movq %r9,%rdx
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups (%rdi),%xmm2
+ xorps %xmm11,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_13:
+.byte 102,15,56,222,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_13
+.byte 102,15,56,223,209
+ xorps %xmm11,%xmm2
+ movups %xmm2,(%rsi)
+
+.Lxts_dec_steal:
+ movzbl 16(%rdi),%eax
+ movzbl (%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,(%rsi)
+ movb %cl,16(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz .Lxts_dec_steal
+
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups (%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_14:
+.byte 102,15,56,222,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_14
+.byte 102,15,56,223,209
+ xorps %xmm10,%xmm2
+ movups %xmm2,(%rsi)
+
+.Lxts_dec_ret:
+ leaq 104(%rsp),%rsp
+.Lxts_dec_epilogue:
+ .byte 0xf3,0xc3
+.size aesni_xts_decrypt,.-aesni_xts_decrypt
+.globl aesni_cbc_encrypt
+.type aesni_cbc_encrypt,@function
+.align 16
+aesni_cbc_encrypt:
+ testq %rdx,%rdx
+ jz .Lcbc_ret
+
+ movl 240(%rcx),%r10d
+ movq %rcx,%r11
+ testl %r9d,%r9d
+ jz .Lcbc_decrypt
+
+ movups (%r8),%xmm2
+ movl %r10d,%eax
+ cmpq $16,%rdx
+ jb .Lcbc_enc_tail
+ subq $16,%rdx
+ jmp .Lcbc_enc_loop
+.align 16
+.Lcbc_enc_loop:
+ movups (%rdi),%xmm3
+ leaq 16(%rdi),%rdi
+
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ xorps %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ xorps %xmm3,%xmm2
+.Loop_enc1_15:
+.byte 102,15,56,220,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_15
+.byte 102,15,56,221,209
+ movl %r10d,%eax
+ movq %r11,%rcx
+ movups %xmm2,0(%rsi)
+ leaq 16(%rsi),%rsi
+ subq $16,%rdx
+ jnc .Lcbc_enc_loop
+ addq $16,%rdx
+ jnz .Lcbc_enc_tail
+ movups %xmm2,(%r8)
+ jmp .Lcbc_ret
+
+.Lcbc_enc_tail:
+ movq %rdx,%rcx
+ xchgq %rdi,%rsi
+.long 0x9066A4F3
+ movl $16,%ecx
+ subq %rdx,%rcx
+ xorl %eax,%eax
+.long 0x9066AAF3
+ leaq -16(%rdi),%rdi
+ movl %r10d,%eax
+ movq %rdi,%rsi
+ movq %r11,%rcx
+ xorq %rdx,%rdx
+ jmp .Lcbc_enc_loop
+
+.align 16
+.Lcbc_decrypt:
+ movups (%r8),%xmm9
+ movl %r10d,%eax
+ cmpq $112,%rdx
+ jbe .Lcbc_dec_tail
+ shrl $1,%r10d
+ subq $112,%rdx
+ movl %r10d,%eax
+ movaps %xmm9,-24(%rsp)
+ jmp .Lcbc_dec_loop8_enter
+.align 16
+.Lcbc_dec_loop8:
+ movaps %xmm0,-24(%rsp)
+ movups %xmm9,(%rsi)
+ leaq 16(%rsi),%rsi
+.Lcbc_dec_loop8_enter:
+ movups (%rcx),%xmm0
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 16(%rcx),%xmm1
+
+ leaq 32(%rcx),%rcx
+ movdqu 32(%rdi),%xmm4
+ xorps %xmm0,%xmm2
+ movdqu 48(%rdi),%xmm5
+ xorps %xmm0,%xmm3
+ movdqu 64(%rdi),%xmm6
+.byte 102,15,56,222,209
+ pxor %xmm0,%xmm4
+ movdqu 80(%rdi),%xmm7
+.byte 102,15,56,222,217
+ pxor %xmm0,%xmm5
+ movdqu 96(%rdi),%xmm8
+.byte 102,15,56,222,225
+ pxor %xmm0,%xmm6
+ movdqu 112(%rdi),%xmm9
+.byte 102,15,56,222,233
+ pxor %xmm0,%xmm7
+ decl %eax
+.byte 102,15,56,222,241
+ pxor %xmm0,%xmm8
+.byte 102,15,56,222,249
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+.byte 102,68,15,56,222,193
+.byte 102,68,15,56,222,201
+ movups 16(%rcx),%xmm1
+
+ call .Ldec_loop8_enter
+
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps -24(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm1
+ xorps %xmm0,%xmm8
+ movups 112(%rdi),%xmm0
+ xorps %xmm1,%xmm9
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movq %r11,%rcx
+ movups %xmm7,80(%rsi)
+ leaq 128(%rdi),%rdi
+ movups %xmm8,96(%rsi)
+ leaq 112(%rsi),%rsi
+ subq $128,%rdx
+ ja .Lcbc_dec_loop8
+
+ movaps %xmm9,%xmm2
+ movaps %xmm0,%xmm9
+ addq $112,%rdx
+ jle .Lcbc_dec_tail_collected
+ movups %xmm2,(%rsi)
+ leal 1(%r10,%r10,1),%eax
+ leaq 16(%rsi),%rsi
+.Lcbc_dec_tail:
+ movups (%rdi),%xmm2
+ movaps %xmm2,%xmm8
+ cmpq $16,%rdx
+ jbe .Lcbc_dec_one
+
+ movups 16(%rdi),%xmm3
+ movaps %xmm3,%xmm7
+ cmpq $32,%rdx
+ jbe .Lcbc_dec_two
+
+ movups 32(%rdi),%xmm4
+ movaps %xmm4,%xmm6
+ cmpq $48,%rdx
+ jbe .Lcbc_dec_three
+
+ movups 48(%rdi),%xmm5
+ cmpq $64,%rdx
+ jbe .Lcbc_dec_four
+
+ movups 64(%rdi),%xmm6
+ cmpq $80,%rdx
+ jbe .Lcbc_dec_five
+
+ movups 80(%rdi),%xmm7
+ cmpq $96,%rdx
+ jbe .Lcbc_dec_six
+
+ movups 96(%rdi),%xmm8
+ movaps %xmm9,-24(%rsp)
+ call _aesni_decrypt8
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps -24(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm9
+ xorps %xmm0,%xmm8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movaps %xmm8,%xmm2
+ subq $112,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_16:
+.byte 102,15,56,222,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_16
+.byte 102,15,56,223,209
+ xorps %xmm9,%xmm2
+ movaps %xmm8,%xmm9
+ subq $16,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ movaps %xmm7,%xmm9
+ movaps %xmm3,%xmm2
+ leaq 16(%rsi),%rsi
+ subq $32,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_three:
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ movaps %xmm6,%xmm9
+ movaps %xmm4,%xmm2
+ leaq 32(%rsi),%rsi
+ subq $48,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_four:
+ call _aesni_decrypt4
+ xorps %xmm9,%xmm2
+ movups 48(%rdi),%xmm9
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ xorps %xmm6,%xmm5
+ movups %xmm4,32(%rsi)
+ movaps %xmm5,%xmm2
+ leaq 48(%rsi),%rsi
+ subq $64,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_five:
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm9
+ xorps %xmm1,%xmm6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ movaps %xmm6,%xmm2
+ subq $80,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_six:
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm0
+ xorps %xmm1,%xmm6
+ movups 80(%rdi),%xmm9
+ xorps %xmm0,%xmm7
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ movaps %xmm7,%xmm2
+ subq $96,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_tail_collected:
+ andq $15,%rdx
+ movups %xmm9,(%r8)
+ jnz .Lcbc_dec_tail_partial
+ movups %xmm2,(%rsi)
+ jmp .Lcbc_dec_ret
+.align 16
+.Lcbc_dec_tail_partial:
+ movaps %xmm2,-24(%rsp)
+ movq $16,%rcx
+ movq %rsi,%rdi
+ subq %rdx,%rcx
+ leaq -24(%rsp),%rsi
+.long 0x9066A4F3
+
+.Lcbc_dec_ret:
+.Lcbc_ret:
+ .byte 0xf3,0xc3
+.size aesni_cbc_encrypt,.-aesni_cbc_encrypt
+.globl aesni_set_decrypt_key
+.type aesni_set_decrypt_key,@function
+.align 16
+aesni_set_decrypt_key:
+.byte 0x48,0x83,0xEC,0x08
+ call __aesni_set_encrypt_key
+ shll $4,%esi
+ testl %eax,%eax
+ jnz .Ldec_key_ret
+ leaq 16(%rdx,%rsi,1),%rdi
+
+ movups (%rdx),%xmm0
+ movups (%rdi),%xmm1
+ movups %xmm0,(%rdi)
+ movups %xmm1,(%rdx)
+ leaq 16(%rdx),%rdx
+ leaq -16(%rdi),%rdi
+
+.Ldec_key_inverse:
+ movups (%rdx),%xmm0
+ movups (%rdi),%xmm1
+.byte 102,15,56,219,192
+.byte 102,15,56,219,201
+ leaq 16(%rdx),%rdx
+ leaq -16(%rdi),%rdi
+ movups %xmm0,16(%rdi)
+ movups %xmm1,-16(%rdx)
+ cmpq %rdx,%rdi
+ ja .Ldec_key_inverse
+
+ movups (%rdx),%xmm0
+.byte 102,15,56,219,192
+ movups %xmm0,(%rdi)
+.Ldec_key_ret:
+ addq $8,%rsp
+ .byte 0xf3,0xc3
+.LSEH_end_set_decrypt_key:
+.size aesni_set_decrypt_key,.-aesni_set_decrypt_key
+.globl aesni_set_encrypt_key
+.type aesni_set_encrypt_key,@function
+.align 16
+aesni_set_encrypt_key:
+__aesni_set_encrypt_key:
+.byte 0x48,0x83,0xEC,0x08
+ movq $-1,%rax
+ testq %rdi,%rdi
+ jz .Lenc_key_ret
+ testq %rdx,%rdx
+ jz .Lenc_key_ret
+
+ movups (%rdi),%xmm0
+ xorps %xmm4,%xmm4
+ leaq 16(%rdx),%rax
+ cmpl $256,%esi
+ je .L14rounds
+ cmpl $192,%esi
+ je .L12rounds
+ cmpl $128,%esi
+ jne .Lbad_keybits
+
+.L10rounds:
+ movl $9,%esi
+ movups %xmm0,(%rdx)
+.byte 102,15,58,223,200,1
+ call .Lkey_expansion_128_cold
+.byte 102,15,58,223,200,2
+ call .Lkey_expansion_128
+.byte 102,15,58,223,200,4
+ call .Lkey_expansion_128
+.byte 102,15,58,223,200,8
+ call .Lkey_expansion_128
+.byte 102,15,58,223,200,16
+ call .Lkey_expansion_128
+.byte 102,15,58,223,200,32
+ call .Lkey_expansion_128
+.byte 102,15,58,223,200,64
+ call .Lkey_expansion_128
+.byte 102,15,58,223,200,128
+ call .Lkey_expansion_128
+.byte 102,15,58,223,200,27
+ call .Lkey_expansion_128
+.byte 102,15,58,223,200,54
+ call .Lkey_expansion_128
+ movups %xmm0,(%rax)
+ movl %esi,80(%rax)
+ xorl %eax,%eax
+ jmp .Lenc_key_ret
+
+.align 16
+.L12rounds:
+ movq 16(%rdi),%xmm2
+ movl $11,%esi
+ movups %xmm0,(%rdx)
+.byte 102,15,58,223,202,1
+ call .Lkey_expansion_192a_cold
+.byte 102,15,58,223,202,2
+ call .Lkey_expansion_192b
+.byte 102,15,58,223,202,4
+ call .Lkey_expansion_192a
+.byte 102,15,58,223,202,8
+ call .Lkey_expansion_192b
+.byte 102,15,58,223,202,16
+ call .Lkey_expansion_192a
+.byte 102,15,58,223,202,32
+ call .Lkey_expansion_192b
+.byte 102,15,58,223,202,64
+ call .Lkey_expansion_192a
+.byte 102,15,58,223,202,128
+ call .Lkey_expansion_192b
+ movups %xmm0,(%rax)
+ movl %esi,48(%rax)
+ xorq %rax,%rax
+ jmp .Lenc_key_ret
+
+.align 16
+.L14rounds:
+ movups 16(%rdi),%xmm2
+ movl $13,%esi
+ leaq 16(%rax),%rax
+ movups %xmm0,(%rdx)
+ movups %xmm2,16(%rdx)
+.byte 102,15,58,223,202,1
+ call .Lkey_expansion_256a_cold
+.byte 102,15,58,223,200,1
+ call .Lkey_expansion_256b
+.byte 102,15,58,223,202,2
+ call .Lkey_expansion_256a
+.byte 102,15,58,223,200,2
+ call .Lkey_expansion_256b
+.byte 102,15,58,223,202,4
+ call .Lkey_expansion_256a
+.byte 102,15,58,223,200,4
+ call .Lkey_expansion_256b
+.byte 102,15,58,223,202,8
+ call .Lkey_expansion_256a
+.byte 102,15,58,223,200,8
+ call .Lkey_expansion_256b
+.byte 102,15,58,223,202,16
+ call .Lkey_expansion_256a
+.byte 102,15,58,223,200,16
+ call .Lkey_expansion_256b
+.byte 102,15,58,223,202,32
+ call .Lkey_expansion_256a
+.byte 102,15,58,223,200,32
+ call .Lkey_expansion_256b
+.byte 102,15,58,223,202,64
+ call .Lkey_expansion_256a
+ movups %xmm0,(%rax)
+ movl %esi,16(%rax)
+ xorq %rax,%rax
+ jmp .Lenc_key_ret
+
+.align 16
+.Lbad_keybits:
+ movq $-2,%rax
+.Lenc_key_ret:
+ addq $8,%rsp
+ .byte 0xf3,0xc3
+.LSEH_end_set_encrypt_key:
+
+.align 16
+.Lkey_expansion_128:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_128_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ .byte 0xf3,0xc3
+
+.align 16
+.Lkey_expansion_192a:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_192a_cold:
+ movaps %xmm2,%xmm5
+.Lkey_expansion_192b_warm:
+ shufps $16,%xmm0,%xmm4
+ movdqa %xmm2,%xmm3
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ pslldq $4,%xmm3
+ xorps %xmm4,%xmm0
+ pshufd $85,%xmm1,%xmm1
+ pxor %xmm3,%xmm2
+ pxor %xmm1,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm3,%xmm2
+ .byte 0xf3,0xc3
+
+.align 16
+.Lkey_expansion_192b:
+ movaps %xmm0,%xmm3
+ shufps $68,%xmm0,%xmm5
+ movups %xmm5,(%rax)
+ shufps $78,%xmm2,%xmm3
+ movups %xmm3,16(%rax)
+ leaq 32(%rax),%rax
+ jmp .Lkey_expansion_192b_warm
+
+.align 16
+.Lkey_expansion_256a:
+ movups %xmm2,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_256a_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ .byte 0xf3,0xc3
+
+.align 16
+.Lkey_expansion_256b:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+
+ shufps $16,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $140,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $170,%xmm1,%xmm1
+ xorps %xmm1,%xmm2
+ .byte 0xf3,0xc3
+.size aesni_set_encrypt_key,.-aesni_set_encrypt_key
+.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
+.align 64
+.Lbswap_mask:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.Lincrement32:
+.long 6,6,6,0
+.Lincrement64:
+.long 1,0,0,0
+.Lxts_magic:
+.long 0x87,0,1,0
+
+.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
--- /dev/null
+.text
+
+
+
+
+.type _bsaes_encrypt8,@function
+.align 64
+_bsaes_encrypt8:
+ leaq .LBS0(%rip),%r11
+
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa 96(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+_bsaes_encrypt8_bitslice:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp .Lenc_sbox
+.align 16
+.Lenc_loop:
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+.Lenc_sbox:
+ pxor %xmm5,%xmm4
+ pxor %xmm0,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm1,%xmm5
+ pxor %xmm15,%xmm4
+
+ pxor %xmm2,%xmm5
+ pxor %xmm6,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm3,%xmm2
+ pxor %xmm4,%xmm3
+ pxor %xmm0,%xmm2
+
+ pxor %xmm6,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm6,%xmm10
+ movdqa %xmm0,%xmm9
+ movdqa %xmm4,%xmm8
+ movdqa %xmm1,%xmm12
+ movdqa %xmm5,%xmm11
+
+ pxor %xmm3,%xmm10
+ pxor %xmm1,%xmm9
+ pxor %xmm2,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm3,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm15,%xmm11
+ movdqa %xmm10,%xmm14
+
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm2,%xmm11
+ pxor %xmm15,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm6,%xmm12
+ movdqa %xmm4,%xmm11
+ pxor %xmm0,%xmm12
+ pxor %xmm5,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm1,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm3,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm0,%xmm13
+ pand %xmm2,%xmm11
+ movdqa %xmm6,%xmm14
+ pand %xmm15,%xmm12
+ pand %xmm4,%xmm13
+ por %xmm5,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+
+
+
+
+
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+
+ pxor %xmm9,%xmm10
+
+ pand %xmm10,%xmm12
+
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+
+ pand %xmm7,%xmm9
+
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+
+ pand %xmm14,%xmm13
+
+ pxor %xmm11,%xmm13
+ movdqa %xmm5,%xmm11
+ movdqa %xmm4,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm5,%xmm9
+ pxor %xmm4,%xmm5
+ pand %xmm14,%xmm4
+ pand %xmm13,%xmm5
+ pxor %xmm4,%xmm5
+ pxor %xmm9,%xmm4
+ pxor %xmm15,%xmm11
+ pxor %xmm2,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm2,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm2
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm2,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm2
+ pxor %xmm11,%xmm5
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm2
+
+ movdqa %xmm6,%xmm11
+ movdqa %xmm0,%xmm7
+ pxor %xmm3,%xmm11
+ pxor %xmm1,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm3,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm1,%xmm3
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm1
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm3
+ pxor %xmm11,%xmm7
+ pxor %xmm1,%xmm3
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm1
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm6,%xmm10
+ pxor %xmm0,%xmm6
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm6
+ pxor %xmm0,%xmm6
+ pxor %xmm10,%xmm0
+ pxor %xmm11,%xmm6
+ pxor %xmm11,%xmm3
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm1
+ pxor %xmm15,%xmm6
+ pxor %xmm5,%xmm0
+ pxor %xmm6,%xmm3
+ pxor %xmm15,%xmm5
+ pxor %xmm0,%xmm15
+
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ pxor %xmm2,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm4,%xmm3
+
+ pxor %xmm2,%xmm5
+ decl %r10d
+ jl .Lenc_done
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm3,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm5,%xmm10
+ pxor %xmm9,%xmm3
+ pshufd $147,%xmm2,%xmm11
+ pxor %xmm10,%xmm5
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm2
+ pshufd $147,%xmm1,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm1
+ pxor %xmm14,%xmm4
+
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm2,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm5,%xmm11
+ pshufd $78,%xmm2,%xmm7
+ pxor %xmm1,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm3,%xmm10
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm1,%xmm5
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm12,%xmm8
+
+ pxor %xmm10,%xmm2
+ pxor %xmm14,%xmm6
+ pxor %xmm13,%xmm5
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm1
+ movdqa %xmm8,%xmm4
+ movdqa 48(%r11),%xmm7
+ jnz .Lenc_loop
+ movdqa 64(%r11),%xmm7
+ jmp .Lenc_loop
+.align 16
+.Lenc_done:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm2,%xmm10
+ psrlq $1,%xmm2
+ pxor %xmm4,%xmm1
+ pxor %xmm6,%xmm2
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm2
+ pxor %xmm1,%xmm4
+ psllq $1,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $1,%xmm2
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm2
+ movdqa %xmm3,%xmm9
+ psrlq $1,%xmm3
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm5,%xmm3
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm15
+ pxor %xmm3,%xmm5
+ psllq $1,%xmm3
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm2,%xmm10
+ psrlq $2,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm1,%xmm2
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm2
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm2,%xmm1
+ psllq $2,%xmm2
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm2
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm5,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm5
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm5,%xmm9
+ psrlq $4,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $4,%xmm3
+ pxor %xmm4,%xmm5
+ pxor %xmm1,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm4
+ psllq $4,%xmm5
+ pxor %xmm3,%xmm1
+ psllq $4,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm2,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm2
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ .byte 0xf3,0xc3
+.size _bsaes_encrypt8,.-_bsaes_encrypt8
+
+.type _bsaes_decrypt8,@function
+.align 64
+_bsaes_decrypt8:
+ leaq .LBS0(%rip),%r11
+
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa -48(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp .Ldec_sbox
+.align 16
+.Ldec_loop:
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+.Ldec_sbox:
+ pxor %xmm3,%xmm2
+
+ pxor %xmm6,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm3,%xmm5
+ pxor %xmm5,%xmm6
+ pxor %xmm6,%xmm0
+
+ pxor %xmm0,%xmm15
+ pxor %xmm4,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm15,%xmm4
+ pxor %xmm2,%xmm0
+ movdqa %xmm2,%xmm10
+ movdqa %xmm6,%xmm9
+ movdqa %xmm0,%xmm8
+ movdqa %xmm3,%xmm12
+ movdqa %xmm4,%xmm11
+
+ pxor %xmm15,%xmm10
+ pxor %xmm3,%xmm9
+ pxor %xmm5,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm15,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm1,%xmm11
+ movdqa %xmm10,%xmm14
+
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm5,%xmm11
+ pxor %xmm1,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm2,%xmm12
+ movdqa %xmm0,%xmm11
+ pxor %xmm6,%xmm12
+ pxor %xmm4,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm3,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm15,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm6,%xmm13
+ pand %xmm5,%xmm11
+ movdqa %xmm2,%xmm14
+ pand %xmm1,%xmm12
+ pand %xmm0,%xmm13
+ por %xmm4,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+
+
+
+
+
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+
+ pxor %xmm9,%xmm10
+
+ pand %xmm10,%xmm12
+
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+
+ pand %xmm7,%xmm9
+
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+
+ pand %xmm14,%xmm13
+
+ pxor %xmm11,%xmm13
+ movdqa %xmm4,%xmm11
+ movdqa %xmm0,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm4,%xmm9
+ pxor %xmm0,%xmm4
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm4
+ pxor %xmm0,%xmm4
+ pxor %xmm9,%xmm0
+ pxor %xmm1,%xmm11
+ pxor %xmm5,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm1,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm5,%xmm1
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm5
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm1
+ pxor %xmm11,%xmm7
+ pxor %xmm5,%xmm1
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm5
+ pxor %xmm11,%xmm4
+ pxor %xmm11,%xmm1
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm5
+
+ movdqa %xmm2,%xmm11
+ movdqa %xmm6,%xmm7
+ pxor %xmm15,%xmm11
+ pxor %xmm3,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm3,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm3
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm3,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm3
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm2,%xmm10
+ pxor %xmm6,%xmm2
+ pand %xmm14,%xmm6
+ pand %xmm13,%xmm2
+ pxor %xmm6,%xmm2
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm4,%xmm5
+
+ pxor %xmm0,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm6,%xmm4
+ pxor %xmm1,%xmm3
+ pxor %xmm15,%xmm6
+ pxor %xmm4,%xmm3
+ pxor %xmm5,%xmm2
+ pxor %xmm0,%xmm5
+ pxor %xmm3,%xmm2
+
+ pxor %xmm15,%xmm3
+ pxor %xmm2,%xmm6
+ decl %r10d
+ jl .Ldec_done
+
+ pshufd $147,%xmm4,%xmm14
+ movdqa %xmm5,%xmm9
+ pxor %xmm6,%xmm4
+ pxor %xmm6,%xmm5
+ pshufd $147,%xmm15,%xmm7
+ movdqa %xmm6,%xmm12
+ pxor %xmm15,%xmm6
+ pxor %xmm0,%xmm15
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm5,%xmm0
+ pxor %xmm2,%xmm15
+ pxor %xmm3,%xmm0
+ pshufd $147,%xmm3,%xmm10
+ pxor %xmm15,%xmm5
+ pxor %xmm4,%xmm3
+ pxor %xmm2,%xmm4
+ pshufd $147,%xmm2,%xmm13
+ movdqa %xmm1,%xmm11
+ pxor %xmm1,%xmm2
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm3
+ pxor %xmm12,%xmm2
+ pxor %xmm9,%xmm3
+ pxor %xmm11,%xmm3
+ pshufd $147,%xmm12,%xmm12
+
+ pxor %xmm4,%xmm6
+ pxor %xmm7,%xmm4
+ pxor %xmm8,%xmm6
+ pshufd $147,%xmm9,%xmm9
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm6
+ pxor %xmm14,%xmm4
+ pshufd $147,%xmm11,%xmm11
+ pxor %xmm13,%xmm14
+ pxor %xmm4,%xmm6
+
+ pxor %xmm7,%xmm5
+ pshufd $147,%xmm7,%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+ pxor %xmm9,%xmm15
+ pshufd $147,%xmm8,%xmm8
+ pxor %xmm9,%xmm5
+ pxor %xmm9,%xmm3
+ pxor %xmm14,%xmm15
+ pshufd $147,%xmm9,%xmm9
+ pxor %xmm10,%xmm5
+ pxor %xmm10,%xmm1
+ pxor %xmm10,%xmm0
+ pshufd $147,%xmm10,%xmm10
+ pxor %xmm11,%xmm2
+ pxor %xmm11,%xmm3
+ pxor %xmm14,%xmm2
+ pxor %xmm12,%xmm5
+ pxor %xmm11,%xmm0
+ pxor %xmm12,%xmm14
+
+ pxor %xmm14,%xmm3
+ pshufd $147,%xmm11,%xmm11
+ pxor %xmm14,%xmm1
+ pxor %xmm14,%xmm0
+
+ pxor %xmm12,%xmm14
+ pshufd $147,%xmm12,%xmm12
+ pxor %xmm13,%xmm14
+
+
+ pxor %xmm2,%xmm0
+ pxor %xmm11,%xmm2
+ pshufd $147,%xmm13,%xmm13
+ pxor %xmm7,%xmm15
+ pxor %xmm12,%xmm2
+ pxor %xmm9,%xmm15
+ pshufd $147,%xmm14,%xmm14
+
+ pxor %xmm6,%xmm5
+ pxor %xmm8,%xmm6
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm5
+ pxor %xmm12,%xmm6
+ pxor %xmm12,%xmm4
+ pxor %xmm14,%xmm6
+ pshufd $147,%xmm7,%xmm7
+ pxor %xmm13,%xmm4
+ pxor %xmm6,%xmm5
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm8,%xmm8
+
+ pxor %xmm14,%xmm2
+ pxor %xmm9,%xmm0
+ pxor %xmm9,%xmm3
+ pshufd $147,%xmm9,%xmm9
+ pxor %xmm13,%xmm15
+ pxor %xmm10,%xmm13
+ pxor %xmm2,%xmm0
+ pxor %xmm13,%xmm5
+
+ pxor %xmm13,%xmm1
+ pxor %xmm12,%xmm3
+ pxor %xmm11,%xmm1
+ pshufd $147,%xmm11,%xmm11
+ pxor %xmm13,%xmm3
+ pxor %xmm14,%xmm1
+ pxor %xmm10,%xmm13
+
+ pshufd $147,%xmm12,%xmm12
+ pshufd $147,%xmm13,%xmm13
+ pshufd $147,%xmm14,%xmm14
+ pshufd $147,%xmm10,%xmm10
+
+
+ pxor %xmm6,%xmm0
+ pxor %xmm6,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm12,%xmm8
+ pxor %xmm7,%xmm5
+